Device specific VFIO driver variant for Xe will implement VF migration.
Export everything that's needed for migration ops.
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
drivers/gpu/drm/xe/Makefile | 2 +
drivers/gpu/drm/xe/xe_sriov_vfio.c | 276 +++++++++++++++++++++++++++++
include/drm/intel/xe_sriov_vfio.h | 30 ++++
3 files changed, 308 insertions(+)
create mode 100644 drivers/gpu/drm/xe/xe_sriov_vfio.c
create mode 100644 include/drm/intel/xe_sriov_vfio.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b848da79a4e18..0938b00a4c7fe 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -184,6 +184,8 @@ xe-$(CONFIG_PCI_IOV) += \
xe_sriov_pf_sysfs.o \
xe_tile_sriov_pf_debugfs.o
+xe-$(CONFIG_XE_VFIO_PCI) += xe_sriov_vfio.o
+
# include helpers for tests even when XE is built-in
ifdef CONFIG_DRM_XE_KUNIT_TEST
xe-y += tests/xe_kunit_helpers.o
diff --git a/drivers/gpu/drm/xe/xe_sriov_vfio.c b/drivers/gpu/drm/xe/xe_sriov_vfio.c
new file mode 100644
index 0000000000000..785f9a5027d10
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vfio.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <drm/intel/xe_sriov_vfio.h>
+#include <linux/cleanup.h>
+
+#include "xe_pci.h"
+#include "xe_pm.h"
+#include "xe_sriov_pf_control.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_migration.h"
+
+/**
+ * xe_sriov_vfio_get_pf() - Get PF &xe_device.
+ * @pdev: the VF &pci_dev device
+ *
+ * Return: pointer to PF &xe_device, NULL otherwise.
+ */
+struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev)
+{
+ return xe_pci_to_pf_device(pdev);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_get_pf, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_migration_supported() - Check if migration is supported.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ *
+ * Return: true if migration is supported, false otherwise.
+ */
+bool xe_sriov_vfio_migration_supported(struct xe_device *xe)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+
+ return xe_sriov_pf_migration_supported(xe);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_migration_supported, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_wait_flr_done() - Wait for VF FLR completion.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * This function will wait until VF FLR is processed by PF on all tiles (or
+ * until timeout occurs).
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_wait_flr_done(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_wait_flr(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_wait_flr_done, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_suspend_device() - Suspend VF.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * This function will pause VF on all tiles/GTs.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_suspend_device(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_pause_vf(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_suspend_device, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_resume_device() - Resume VF.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * This function will resume VF on all tiles.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_resume_device(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_resume_vf(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_device, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_stop_copy_enter() - Initiate a VF device migration data save.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_stop_copy_enter(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_trigger_save_vf(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_enter, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_stop_copy_exit() - Finish a VF device migration data save.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_stop_copy_exit(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_finish_save_vf(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_exit, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_resume_data_enter() - Initiate a VF device migration data restore.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_resume_data_enter(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_trigger_restore_vf(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_data_enter, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_resume_data_exit() - Finish a VF device migration data restore.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_resume_data_exit(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_finish_restore_vf(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_data_exit, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_error() - Move VF device to error state.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * Reset is needed to move it out of error state.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vfio_error(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_control_stop_vf(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_error, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_data_read() - Read migration data from the VF device.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ * @buf: start address of userspace buffer
+ * @len: requested read size from userspace
+ *
+ * Return: number of bytes that has been successfully read,
+ * 0 if no more migration data is available, -errno on failure.
+ */
+ssize_t xe_sriov_vfio_data_read(struct xe_device *xe, unsigned int vfid,
+ char __user *buf, size_t len)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_migration_read(xe, vfid, buf, len);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_read, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_data_write() - Write migration data to the VF device.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ * @buf: start address of userspace buffer
+ * @len: requested write size from userspace
+ *
+ * Return: number of bytes that has been successfully written, -errno on failure.
+ */
+ssize_t xe_sriov_vfio_data_write(struct xe_device *xe, unsigned int vfid,
+ const char __user *buf, size_t len)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_migration_write(xe, vfid, buf, len);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_write, "xe-vfio-pci");
+
+/**
+ * xe_sriov_vfio_stop_copy_size() - Get a size estimate of VF device migration data.
+ * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * Return: migration data size in bytes or a negative error code on failure.
+ */
+ssize_t xe_sriov_vfio_stop_copy_size(struct xe_device *xe, unsigned int vfid)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_migration_size(xe, vfid);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_size, "xe-vfio-pci");
diff --git a/include/drm/intel/xe_sriov_vfio.h b/include/drm/intel/xe_sriov_vfio.h
new file mode 100644
index 0000000000000..bcd7085a81c55
--- /dev/null
+++ b/include/drm/intel/xe_sriov_vfio.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VFIO_H_
+#define _XE_SRIOV_VFIO_H_
+
+#include <linux/types.h>
+
+struct pci_dev;
+struct xe_device;
+
+struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev);
+bool xe_sriov_vfio_migration_supported(struct xe_device *xe);
+int xe_sriov_vfio_wait_flr_done(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_vfio_suspend_device(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_vfio_resume_device(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_vfio_stop_copy_enter(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_vfio_stop_copy_exit(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_vfio_resume_data_enter(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_vfio_resume_data_exit(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_vfio_error(struct xe_device *xe, unsigned int vfid);
+ssize_t xe_sriov_vfio_data_read(struct xe_device *xe, unsigned int vfid,
+ char __user *buf, size_t len);
+ssize_t xe_sriov_vfio_data_write(struct xe_device *xe, unsigned int vfid,
+ const char __user *buf, size_t len);
+ssize_t xe_sriov_vfio_stop_copy_size(struct xe_device *xe, unsigned int vfid);
+
+#endif
--
2.51.2
On Tue, 25 Nov 2025 00:08:40 +0100
Michał Winiarski <michal.winiarski@intel.com> wrote:
> +/**
> + * xe_sriov_vfio_wait_flr_done() - Wait for VF FLR completion.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * This function will wait until VF FLR is processed by PF on all tiles (or
> + * until timeout occurs).
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_wait_flr_done(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_wait_flr(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_wait_flr_done, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_suspend_device() - Suspend VF.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * This function will pause VF on all tiles/GTs.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_suspend_device(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_pause_vf(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_suspend_device, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_resume_device() - Resume VF.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * This function will resume VF on all tiles.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_resume_device(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_resume_vf(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_device, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_stop_copy_enter() - Initiate a VF device migration data save.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_stop_copy_enter(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_trigger_save_vf(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_enter, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_stop_copy_exit() - Finish a VF device migration data save.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_stop_copy_exit(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_finish_save_vf(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_exit, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_resume_data_enter() - Initiate a VF device migration data restore.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_resume_data_enter(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_trigger_restore_vf(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_data_enter, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_resume_data_exit() - Finish a VF device migration data restore.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_resume_data_exit(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_finish_restore_vf(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_data_exit, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_error() - Move VF device to error state.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * Reset is needed to move it out of error state.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_vfio_error(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_control_stop_vf(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_error, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_data_read() - Read migration data from the VF device.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + * @buf: start address of userspace buffer
> + * @len: requested read size from userspace
> + *
> + * Return: number of bytes that has been successfully read,
> + * 0 if no more migration data is available, -errno on failure.
> + */
> +ssize_t xe_sriov_vfio_data_read(struct xe_device *xe, unsigned int vfid,
> + char __user *buf, size_t len)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_migration_read(xe, vfid, buf, len);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_read, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_data_write() - Write migration data to the VF device.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + * @buf: start address of userspace buffer
> + * @len: requested write size from userspace
> + *
> + * Return: number of bytes that has been successfully written, -errno on failure.
> + */
> +ssize_t xe_sriov_vfio_data_write(struct xe_device *xe, unsigned int vfid,
> + const char __user *buf, size_t len)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_migration_write(xe, vfid, buf, len);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_write, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_stop_copy_size() - Get a size estimate of VF device migration data.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * Return: migration data size in bytes or a negative error code on failure.
> + */
> +ssize_t xe_sriov_vfio_stop_copy_size(struct xe_device *xe, unsigned int vfid)
> +{
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> + return -EINVAL;
> +
> + guard(xe_pm_runtime_noresume)(xe);
> +
> + return xe_sriov_pf_migration_size(xe, vfid);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_size, "xe-vfio-pci");
The duplicated testing and identical structure of most of the above
functions suggests a helper, if not full on definition by macro.
Thanks,
Alex
On Tue, Nov 25, 2025 at 11:34:03AM -0700, Alex Williamson wrote:
> On Tue, 25 Nov 2025 00:08:40 +0100
> Michał Winiarski <michal.winiarski@intel.com> wrote:
> > +/**
> > + * xe_sriov_vfio_wait_flr_done() - Wait for VF FLR completion.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * This function will wait until VF FLR is processed by PF on all tiles (or
> > + * until timeout occurs).
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_wait_flr_done(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_wait_flr(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_wait_flr_done, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_suspend_device() - Suspend VF.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * This function will pause VF on all tiles/GTs.
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_suspend_device(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_pause_vf(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_suspend_device, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_resume_device() - Resume VF.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * This function will resume VF on all tiles.
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_resume_device(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_resume_vf(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_device, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_stop_copy_enter() - Initiate a VF device migration data save.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_stop_copy_enter(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_trigger_save_vf(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_enter, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_stop_copy_exit() - Finish a VF device migration data save.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_stop_copy_exit(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_finish_save_vf(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_exit, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_resume_data_enter() - Initiate a VF device migration data restore.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_resume_data_enter(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_trigger_restore_vf(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_data_enter, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_resume_data_exit() - Finish a VF device migration data restore.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_resume_data_exit(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_finish_restore_vf(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_resume_data_exit, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_error() - Move VF device to error state.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * Reset is needed to move it out of error state.
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_sriov_vfio_error(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_control_stop_vf(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_error, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_data_read() - Read migration data from the VF device.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + * @buf: start address of userspace buffer
> > + * @len: requested read size from userspace
> > + *
> > + * Return: number of bytes that has been successfully read,
> > + * 0 if no more migration data is available, -errno on failure.
> > + */
> > +ssize_t xe_sriov_vfio_data_read(struct xe_device *xe, unsigned int vfid,
> > + char __user *buf, size_t len)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_migration_read(xe, vfid, buf, len);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_read, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_data_write() - Write migration data to the VF device.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + * @buf: start address of userspace buffer
> > + * @len: requested write size from userspace
> > + *
> > + * Return: number of bytes that has been successfully written, -errno on failure.
> > + */
> > +ssize_t xe_sriov_vfio_data_write(struct xe_device *xe, unsigned int vfid,
> > + const char __user *buf, size_t len)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_migration_write(xe, vfid, buf, len);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_write, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_stop_copy_size() - Get a size estimate of VF device migration data.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + * @vfid: the VF identifier (can't be 0)
> > + *
> > + * Return: migration data size in bytes or a negative error code on failure.
> > + */
> > +ssize_t xe_sriov_vfio_stop_copy_size(struct xe_device *xe, unsigned int vfid)
> > +{
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > + if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
> > + return -EINVAL;
> > +
> > + guard(xe_pm_runtime_noresume)(xe);
> > +
> > + return xe_sriov_pf_migration_size(xe, vfid);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_stop_copy_size, "xe-vfio-pci");
>
> The duplicated testing and identical structure of most of the above
> functions suggests a helper, if not full on definition by macro.
> Thanks,
>
> Alex
I'll convert it to use macro definition for everything except
xe_sriov_vfio_data_write/xe_sriov_vfio_data_read.
Thanks,
-Michał
On 11/25/2025 12:08 AM, Michał Winiarski wrote:
> Device specific VFIO driver variant for Xe will implement VF migration.
> Export everything that's needed for migration ops.
>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> ---
> drivers/gpu/drm/xe/Makefile | 2 +
> drivers/gpu/drm/xe/xe_sriov_vfio.c | 276 +++++++++++++++++++++++++++++
> include/drm/intel/xe_sriov_vfio.h | 30 ++++
> 3 files changed, 308 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/xe_sriov_vfio.c
> create mode 100644 include/drm/intel/xe_sriov_vfio.h
>
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index b848da79a4e18..0938b00a4c7fe 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -184,6 +184,8 @@ xe-$(CONFIG_PCI_IOV) += \
> xe_sriov_pf_sysfs.o \
> xe_tile_sriov_pf_debugfs.o
>
> +xe-$(CONFIG_XE_VFIO_PCI) += xe_sriov_vfio.o
hmm, shouldn't we also check for CONFIG_PCI_IOV ?
otherwise, some PF functions might not be available
or there some other implicit rule in Kconfig?
> +
> # include helpers for tests even when XE is built-in
> ifdef CONFIG_DRM_XE_KUNIT_TEST
> xe-y += tests/xe_kunit_helpers.o
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vfio.c b/drivers/gpu/drm/xe/xe_sriov_vfio.c
> new file mode 100644
> index 0000000000000..785f9a5027d10
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_sriov_vfio.c
> @@ -0,0 +1,276 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#include <drm/intel/xe_sriov_vfio.h>
> +#include <linux/cleanup.h>
> +
> +#include "xe_pci.h"
> +#include "xe_pm.h"
> +#include "xe_sriov_pf_control.h"
> +#include "xe_sriov_pf_helpers.h"
> +#include "xe_sriov_pf_migration.h"
> +
> +/**
> + * xe_sriov_vfio_get_pf() - Get PF &xe_device.
> + * @pdev: the VF &pci_dev device
> + *
> + * Return: pointer to PF &xe_device, NULL otherwise.
> + */
> +struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev)
> +{
> + return xe_pci_to_pf_device(pdev);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_get_pf, "xe-vfio-pci");
> +
> +/**
> + * xe_sriov_vfio_migration_supported() - Check if migration is supported.
> + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> + *
> + * Return: true if migration is supported, false otherwise.
> + */
> +bool xe_sriov_vfio_migration_supported(struct xe_device *xe)
> +{
hmm, I'm wondering if maybe we should also check for NULL xe in all those
functions, as above helper function might return NULL in some unlikely case
but maybe this is too defensive
> + if (!IS_SRIOV_PF(xe))
> + return -EPERM;
> +
> + return xe_sriov_pf_migration_supported(xe);
> +}
> +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_migration_supported, "xe-vfio-pci");
> +
everything else lgtm, so:
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
On Tue, Nov 25, 2025 at 03:38:17PM +0100, Michal Wajdeczko wrote:
>
>
> On 11/25/2025 12:08 AM, Michał Winiarski wrote:
> > Device specific VFIO driver variant for Xe will implement VF migration.
> > Export everything that's needed for migration ops.
> >
> > Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> > ---
> > drivers/gpu/drm/xe/Makefile | 2 +
> > drivers/gpu/drm/xe/xe_sriov_vfio.c | 276 +++++++++++++++++++++++++++++
> > include/drm/intel/xe_sriov_vfio.h | 30 ++++
> > 3 files changed, 308 insertions(+)
> > create mode 100644 drivers/gpu/drm/xe/xe_sriov_vfio.c
> > create mode 100644 include/drm/intel/xe_sriov_vfio.h
> >
> > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> > index b848da79a4e18..0938b00a4c7fe 100644
> > --- a/drivers/gpu/drm/xe/Makefile
> > +++ b/drivers/gpu/drm/xe/Makefile
> > @@ -184,6 +184,8 @@ xe-$(CONFIG_PCI_IOV) += \
> > xe_sriov_pf_sysfs.o \
> > xe_tile_sriov_pf_debugfs.o
> >
> > +xe-$(CONFIG_XE_VFIO_PCI) += xe_sriov_vfio.o
>
> hmm, shouldn't we also check for CONFIG_PCI_IOV ?
> otherwise, some PF functions might not be available
> or there some other implicit rule in Kconfig?
I did compile-test without CONFIG_PCI_IOV at some point, and it seems to
build fine for me.
But yeah - it should probably be pulled under CONFIG_PCI_IOV just like
other SR-IOV related files.
I'll do that (+ stubs for when CONFIG_PCI_IOV is disabled).
>
> > +
> > # include helpers for tests even when XE is built-in
> > ifdef CONFIG_DRM_XE_KUNIT_TEST
> > xe-y += tests/xe_kunit_helpers.o
> > diff --git a/drivers/gpu/drm/xe/xe_sriov_vfio.c b/drivers/gpu/drm/xe/xe_sriov_vfio.c
> > new file mode 100644
> > index 0000000000000..785f9a5027d10
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_sriov_vfio.c
> > @@ -0,0 +1,276 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2025 Intel Corporation
> > + */
> > +
> > +#include <drm/intel/xe_sriov_vfio.h>
> > +#include <linux/cleanup.h>
> > +
> > +#include "xe_pci.h"
> > +#include "xe_pm.h"
> > +#include "xe_sriov_pf_control.h"
> > +#include "xe_sriov_pf_helpers.h"
> > +#include "xe_sriov_pf_migration.h"
> > +
> > +/**
> > + * xe_sriov_vfio_get_pf() - Get PF &xe_device.
> > + * @pdev: the VF &pci_dev device
> > + *
> > + * Return: pointer to PF &xe_device, NULL otherwise.
> > + */
> > +struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev)
> > +{
> > + return xe_pci_to_pf_device(pdev);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_get_pf, "xe-vfio-pci");
> > +
> > +/**
> > + * xe_sriov_vfio_migration_supported() - Check if migration is supported.
> > + * @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
> > + *
> > + * Return: true if migration is supported, false otherwise.
> > + */
> > +bool xe_sriov_vfio_migration_supported(struct xe_device *xe)
> > +{
>
> hmm, I'm wondering if maybe we should also check for NULL xe in all those
> functions, as above helper function might return NULL in some unlikely case
>
> but maybe this is too defensive
I think it's too defensive.
The xe_sriov_vfio_get_pf() is used in one place, and the return value is
checked. Worst case - not checking the return value will be caught early
as it will explode immediately with NULL-ptr-deref.
>
> > + if (!IS_SRIOV_PF(xe))
> > + return -EPERM;
> > +
> > + return xe_sriov_pf_migration_supported(xe);
> > +}
> > +EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_migration_supported, "xe-vfio-pci");
> > +
>
> everything else lgtm, so:
>
> Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
>
Thanks,
-Michał
© 2016 - 2025 Red Hat, Inc.