[v2] vfio/xe: Add driver variant for Xe VF migration

[PATCH v2 26/26] vfio/xe: Add vendor-specific vfio_pci driver for Intel graphics

Posted by Michał Winiarski 3 months, 2 weeks ago

In addition to generic VFIO PCI functionality, the driver implements
VFIO migration uAPI, allowing userspace to enable migration for Intel
Graphics SR-IOV Virtual Functions.
The driver binds to VF device, and uses API exposed by Xe driver bound
to PF device to control VF device state and transfer the migration data.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
 MAINTAINERS                  |   7 +
 drivers/vfio/pci/Kconfig     |   2 +
 drivers/vfio/pci/Makefile    |   2 +
 drivers/vfio/pci/xe/Kconfig  |  12 +
 drivers/vfio/pci/xe/Makefile |   3 +
 drivers/vfio/pci/xe/main.c   | 470 +++++++++++++++++++++++++++++++++++
 6 files changed, 496 insertions(+)
 create mode 100644 drivers/vfio/pci/xe/Kconfig
 create mode 100644 drivers/vfio/pci/xe/Makefile
 create mode 100644 drivers/vfio/pci/xe/main.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 096fcca26dc76..255fcb01c98e1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -26976,6 +26976,13 @@ L:	virtualization@lists.linux.dev
 S:	Maintained
 F:	drivers/vfio/pci/virtio
 
+VFIO XE PCI DRIVER
+M:	Michał Winiarski <michal.winiarski@intel.com>
+L:	kvm@vger.kernel.org
+L:	intel-xe@lists.freedesktop.org
+S:	Supported
+F:	drivers/vfio/pci/xe
+
 VGA_SWITCHEROO
 R:	Lukas Wunner <lukas@wunner.de>
 S:	Maintained
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 2b0172f546652..c100f0ab87f2d 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -67,4 +67,6 @@ source "drivers/vfio/pci/nvgrace-gpu/Kconfig"
 
 source "drivers/vfio/pci/qat/Kconfig"
 
+source "drivers/vfio/pci/xe/Kconfig"
+
 endmenu
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index cf00c0a7e55c8..f5d46aa9347b9 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -19,3 +19,5 @@ obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/
 obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu/
 
 obj-$(CONFIG_QAT_VFIO_PCI) += qat/
+
+obj-$(CONFIG_XE_VFIO_PCI) += xe/
diff --git a/drivers/vfio/pci/xe/Kconfig b/drivers/vfio/pci/xe/Kconfig
new file mode 100644
index 0000000000000..787be88268685
--- /dev/null
+++ b/drivers/vfio/pci/xe/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config XE_VFIO_PCI
+	tristate "VFIO support for Intel Graphics"
+	depends on DRM_XE
+	select VFIO_PCI_CORE
+	help
+	  This option enables vendor-specific VFIO driver for Intel Graphics.
+	  In addition to generic VFIO PCI functionality, it implements VFIO
+	  migration uAPI allowing userspace to enable migration for
+	  Intel Graphics SR-IOV Virtual Functions supported by the Xe driver.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/xe/Makefile b/drivers/vfio/pci/xe/Makefile
new file mode 100644
index 0000000000000..13aa0fd192cd4
--- /dev/null
+++ b/drivers/vfio/pci/xe/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_XE_VFIO_PCI) += xe-vfio-pci.o
+xe-vfio-pci-y := main.o
diff --git a/drivers/vfio/pci/xe/main.c b/drivers/vfio/pci/xe/main.c
new file mode 100644
index 0000000000000..bea992cdee6b0
--- /dev/null
+++ b/drivers/vfio/pci/xe/main.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/vfio_pci_core.h>
+
+#include <drm/intel/xe_sriov_vfio.h>
+
+/**
+ * struct xe_vfio_pci_migration_file - file used for reading / writing migration data
+ */
+struct xe_vfio_pci_migration_file {
+	/** @filp: pointer to underlying &struct file */
+	struct file *filp;
+	/** @lock: serializes accesses to migration data */
+	struct mutex lock;
+	/** @xe_vdev: backpointer to &struct xe_vfio_pci_core_device */
+	struct xe_vfio_pci_core_device *xe_vdev;
+};
+
+/**
+ * struct xe_vfio_pci_core_device - xe-specific vfio_pci_core_device
+ *
+ * Top level structure of xe_vfio_pci.
+ */
+struct xe_vfio_pci_core_device {
+	/** @core_device: vendor-agnostic VFIO device */
+	struct vfio_pci_core_device core_device;
+
+	/** @mig_state: current device migration state */
+	enum vfio_device_mig_state mig_state;
+
+	/** @vfid: VF number used by PF, xe uses 1-based indexing for vfid */
+	unsigned int vfid;
+
+	/** @pf: pointer to driver_private of physical function */
+	struct pci_dev *pf;
+
+	/** @fd: &struct xe_vfio_pci_migration_file for userspace to read/write migration data */
+	struct xe_vfio_pci_migration_file *fd;
+};
+
+#define xe_vdev_to_dev(xe_vdev) (&(xe_vdev)->core_device.pdev->dev)
+#define xe_vdev_to_pdev(xe_vdev) ((xe_vdev)->core_device.pdev)
+
+static void xe_vfio_pci_disable_file(struct xe_vfio_pci_migration_file *migf)
+{
+	struct xe_vfio_pci_core_device *xe_vdev = migf->xe_vdev;
+
+	mutex_lock(&migf->lock);
+	xe_vdev->fd = NULL;
+	mutex_unlock(&migf->lock);
+}
+
+static void xe_vfio_pci_reset(struct xe_vfio_pci_core_device *xe_vdev)
+{
+	if (xe_vdev->fd)
+		xe_vfio_pci_disable_file(xe_vdev->fd);
+
+	xe_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+}
+
+static void xe_vfio_pci_reset_done(struct pci_dev *pdev)
+{
+	struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev);
+	int ret;
+
+	ret = xe_sriov_vfio_wait_flr_done(xe_vdev->pf, xe_vdev->vfid);
+	if (ret)
+		dev_err(&pdev->dev, "Failed to wait for FLR: %d\n", ret);
+
+	xe_vfio_pci_reset(xe_vdev);
+}
+
+static const struct pci_error_handlers xe_vfio_pci_err_handlers = {
+	.reset_done = xe_vfio_pci_reset_done,
+};
+
+static int xe_vfio_pci_open_device(struct vfio_device *core_vdev)
+{
+	struct xe_vfio_pci_core_device *xe_vdev =
+		container_of(core_vdev, struct xe_vfio_pci_core_device, core_device.vdev);
+	struct vfio_pci_core_device *vdev = &xe_vdev->core_device;
+	int ret;
+
+	ret = vfio_pci_core_enable(vdev);
+	if (ret)
+		return ret;
+
+	vfio_pci_core_finish_enable(vdev);
+
+	return 0;
+}
+
+static int xe_vfio_pci_release_file(struct inode *inode, struct file *filp)
+{
+	struct xe_vfio_pci_migration_file *migf = filp->private_data;
+
+	xe_vfio_pci_disable_file(migf);
+	mutex_destroy(&migf->lock);
+	kfree(migf);
+
+	return 0;
+}
+
+static ssize_t xe_vfio_pci_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos)
+{
+	struct xe_vfio_pci_migration_file *migf = filp->private_data;
+	ssize_t ret;
+
+	if (pos)
+		return -ESPIPE;
+
+	mutex_lock(&migf->lock);
+	ret = xe_sriov_vfio_data_read(migf->xe_vdev->pf, migf->xe_vdev->vfid, buf, len);
+	mutex_unlock(&migf->lock);
+
+	return ret;
+}
+
+static const struct file_operations xe_vfio_pci_save_fops = {
+	.owner = THIS_MODULE,
+	.read = xe_vfio_pci_save_read,
+	.release = xe_vfio_pci_release_file,
+	.llseek = noop_llseek,
+};
+
+static ssize_t xe_vfio_pci_resume_write(struct file *filp, const char __user *buf,
+					size_t len, loff_t *pos)
+{
+	struct xe_vfio_pci_migration_file *migf = filp->private_data;
+	ssize_t ret;
+
+	if (pos)
+		return -ESPIPE;
+
+	mutex_lock(&migf->lock);
+	ret = xe_sriov_vfio_data_write(migf->xe_vdev->pf, migf->xe_vdev->vfid, buf, len);
+	mutex_unlock(&migf->lock);
+
+	return ret;
+}
+
+static const struct file_operations xe_vfio_pci_resume_fops = {
+	.owner = THIS_MODULE,
+	.write = xe_vfio_pci_resume_write,
+	.release = xe_vfio_pci_release_file,
+	.llseek = noop_llseek,
+};
+
+static const char *vfio_dev_state_str(u32 state)
+{
+	switch (state) {
+	case VFIO_DEVICE_STATE_RUNNING: return "running";
+	case VFIO_DEVICE_STATE_RUNNING_P2P: return "running_p2p";
+	case VFIO_DEVICE_STATE_STOP_COPY: return "stopcopy";
+	case VFIO_DEVICE_STATE_STOP: return "stop";
+	case VFIO_DEVICE_STATE_RESUMING: return "resuming";
+	case VFIO_DEVICE_STATE_ERROR: return "error";
+	default: return "";
+	}
+}
+
+enum xe_vfio_pci_file_type {
+	XE_VFIO_FILE_SAVE = 0,
+	XE_VFIO_FILE_RESUME,
+};
+
+static struct xe_vfio_pci_migration_file *
+xe_vfio_pci_alloc_file(struct xe_vfio_pci_core_device *xe_vdev,
+		       enum xe_vfio_pci_file_type type)
+{
+	struct xe_vfio_pci_migration_file *migf;
+	const struct file_operations *fops;
+	int flags;
+
+	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+	if (!migf)
+		return ERR_PTR(-ENOMEM);
+
+	fops = type == XE_VFIO_FILE_SAVE ? &xe_vfio_pci_save_fops : &xe_vfio_pci_resume_fops;
+	flags = type == XE_VFIO_FILE_SAVE ? O_RDONLY : O_WRONLY;
+	migf->filp = anon_inode_getfile("xe_vfio_mig", fops, migf, flags);
+	if (IS_ERR(migf->filp)) {
+		kfree(migf);
+		return ERR_CAST(migf->filp);
+	}
+
+	mutex_init(&migf->lock);
+	migf->xe_vdev = xe_vdev;
+	xe_vdev->fd = migf;
+
+	stream_open(migf->filp->f_inode, migf->filp);
+
+	return migf;
+}
+
+static struct file *
+xe_vfio_set_state(struct xe_vfio_pci_core_device *xe_vdev, u32 new)
+{
+	u32 cur = xe_vdev->mig_state;
+	int ret;
+
+	dev_dbg(xe_vdev_to_dev(xe_vdev),
+		"state: %s->%s\n", vfio_dev_state_str(cur), vfio_dev_state_str(new));
+
+	/*
+	 * "STOP" handling is reused for "RUNNING_P2P", as the device doesn't have the capability to
+	 * selectively block p2p DMA transfers.
+	 * The device is not processing new workload requests when the VF is stopped, and both
+	 * memory and MMIO communication channels are transferred to destination (where processing
+	 * will be resumed).
+	 */
+	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) ||
+	    (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
+		ret = xe_sriov_vfio_stop(xe_vdev->pf, xe_vdev->vfid);
+		if (ret)
+			goto err;
+
+		return NULL;
+	}
+
+	if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) ||
+	    (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P))
+		return NULL;
+
+	if ((cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) ||
+	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING)) {
+		ret = xe_sriov_vfio_run(xe_vdev->pf, xe_vdev->vfid);
+		if (ret)
+			goto err;
+
+		return NULL;
+	}
+
+	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
+		struct xe_vfio_pci_migration_file *migf;
+
+		migf = xe_vfio_pci_alloc_file(xe_vdev, XE_VFIO_FILE_SAVE);
+		if (IS_ERR(migf)) {
+			ret = PTR_ERR(migf);
+			goto err;
+		}
+
+		ret = xe_sriov_vfio_stop_copy_enter(xe_vdev->pf, xe_vdev->vfid);
+		if (ret) {
+			fput(migf->filp);
+			goto err;
+		}
+
+		return migf->filp;
+	}
+
+	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
+		if (xe_vdev->fd)
+			xe_vfio_pci_disable_file(xe_vdev->fd);
+
+		xe_sriov_vfio_stop_copy_exit(xe_vdev->pf, xe_vdev->vfid);
+
+		return NULL;
+	}
+
+	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
+		struct xe_vfio_pci_migration_file *migf;
+
+		migf = xe_vfio_pci_alloc_file(xe_vdev, XE_VFIO_FILE_RESUME);
+		if (IS_ERR(migf)) {
+			ret = PTR_ERR(migf);
+			goto err;
+		}
+
+		ret = xe_sriov_vfio_resume_enter(xe_vdev->pf, xe_vdev->vfid);
+		if (ret) {
+			fput(migf->filp);
+			goto err;
+		}
+
+		return migf->filp;
+	}
+
+	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
+		if (xe_vdev->fd)
+			xe_vfio_pci_disable_file(xe_vdev->fd);
+
+		xe_sriov_vfio_resume_exit(xe_vdev->pf, xe_vdev->vfid);
+
+		return NULL;
+	}
+
+	if (new == VFIO_DEVICE_STATE_ERROR)
+		xe_sriov_vfio_error(xe_vdev->pf, xe_vdev->vfid);
+
+	WARN(true, "Unknown state transition %d->%d", cur, new);
+	return ERR_PTR(-EINVAL);
+
+err:
+	dev_dbg(xe_vdev_to_dev(xe_vdev),
+		"Failed to transition state: %s->%s err=%d\n",
+		vfio_dev_state_str(cur), vfio_dev_state_str(new), ret);
+	return ERR_PTR(ret);
+}
+
+static struct file *
+xe_vfio_pci_set_device_state(struct vfio_device *core_vdev,
+			     enum vfio_device_mig_state new_state)
+{
+	struct xe_vfio_pci_core_device *xe_vdev =
+		container_of(core_vdev, struct xe_vfio_pci_core_device, core_device.vdev);
+	enum vfio_device_mig_state next_state;
+	struct file *f = NULL;
+	int ret;
+
+	while (new_state != xe_vdev->mig_state) {
+		ret = vfio_mig_get_next_state(core_vdev, xe_vdev->mig_state,
+					      new_state, &next_state);
+		if (ret) {
+			f = ERR_PTR(ret);
+			break;
+		}
+		f = xe_vfio_set_state(xe_vdev, next_state);
+		if (IS_ERR(f))
+			break;
+
+		xe_vdev->mig_state = next_state;
+
+		/* Multiple state transitions with non-NULL file in the middle */
+		if (f && new_state != xe_vdev->mig_state) {
+			fput(f);
+			f = ERR_PTR(-EINVAL);
+			break;
+		}
+	}
+
+	return f;
+}
+
+static int xe_vfio_pci_get_device_state(struct vfio_device *core_vdev,
+					enum vfio_device_mig_state *curr_state)
+{
+	struct xe_vfio_pci_core_device *xe_vdev =
+		container_of(core_vdev, struct xe_vfio_pci_core_device, core_device.vdev);
+
+	*curr_state = xe_vdev->mig_state;
+
+	return 0;
+}
+
+static int xe_vfio_pci_get_data_size(struct vfio_device *vdev,
+				     unsigned long *stop_copy_length)
+{
+	struct xe_vfio_pci_core_device *xe_vdev =
+		container_of(vdev, struct xe_vfio_pci_core_device, core_device.vdev);
+
+	*stop_copy_length = xe_sriov_vfio_stop_copy_size(xe_vdev->pf, xe_vdev->vfid);
+
+	return 0;
+}
+
+static const struct vfio_migration_ops xe_vfio_pci_migration_ops = {
+	.migration_set_state = xe_vfio_pci_set_device_state,
+	.migration_get_state = xe_vfio_pci_get_device_state,
+	.migration_get_data_size = xe_vfio_pci_get_data_size,
+};
+
+static void xe_vfio_pci_migration_init(struct vfio_device *core_vdev)
+{
+	struct xe_vfio_pci_core_device *xe_vdev =
+		container_of(core_vdev, struct xe_vfio_pci_core_device, core_device.vdev);
+	struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
+
+	if (!xe_sriov_vfio_migration_supported(pdev->physfn))
+		return;
+
+	/* vfid starts from 1 for xe */
+	xe_vdev->vfid = pci_iov_vf_id(pdev) + 1;
+	xe_vdev->pf = pdev->physfn;
+
+	core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P;
+	core_vdev->mig_ops = &xe_vfio_pci_migration_ops;
+}
+
+static int xe_vfio_pci_init_dev(struct vfio_device *core_vdev)
+{
+	struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
+
+	if (pdev->is_virtfn && strcmp(pdev->physfn->dev.driver->name, "xe") == 0)
+		xe_vfio_pci_migration_init(core_vdev);
+
+	return vfio_pci_core_init_dev(core_vdev);
+}
+
+static const struct vfio_device_ops xe_vfio_pci_ops = {
+	.name = "xe-vfio-pci",
+	.init = xe_vfio_pci_init_dev,
+	.release = vfio_pci_core_release_dev,
+	.open_device = xe_vfio_pci_open_device,
+	.close_device = vfio_pci_core_close_device,
+	.ioctl = vfio_pci_core_ioctl,
+	.device_feature = vfio_pci_core_ioctl_feature,
+	.read = vfio_pci_core_read,
+	.write = vfio_pci_core_write,
+	.mmap = vfio_pci_core_mmap,
+	.request = vfio_pci_core_request,
+	.match = vfio_pci_core_match,
+	.match_token_uuid = vfio_pci_core_match_token_uuid,
+	.bind_iommufd = vfio_iommufd_physical_bind,
+	.unbind_iommufd = vfio_iommufd_physical_unbind,
+	.attach_ioas = vfio_iommufd_physical_attach_ioas,
+	.detach_ioas = vfio_iommufd_physical_detach_ioas,
+};
+
+static int xe_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct xe_vfio_pci_core_device *xe_vdev;
+	int ret;
+
+	xe_vdev = vfio_alloc_device(xe_vfio_pci_core_device, core_device.vdev, &pdev->dev,
+				    &xe_vfio_pci_ops);
+	if (IS_ERR(xe_vdev))
+		return PTR_ERR(xe_vdev);
+
+	dev_set_drvdata(&pdev->dev, &xe_vdev->core_device);
+
+	ret = vfio_pci_core_register_device(&xe_vdev->core_device);
+	if (ret) {
+		vfio_put_device(&xe_vdev->core_device.vdev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void xe_vfio_pci_remove(struct pci_dev *pdev)
+{
+	struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev);
+
+	vfio_pci_core_unregister_device(&xe_vdev->core_device);
+	vfio_put_device(&xe_vdev->core_device.vdev);
+}
+
+static const struct pci_device_id xe_vfio_pci_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_ANY_ID),
+		.class = PCI_BASE_CLASS_DISPLAY << 16, .class_mask = 0xff << 16,
+		.override_only = PCI_ID_F_VFIO_DRIVER_OVERRIDE },
+	{}
+};
+MODULE_DEVICE_TABLE(pci, xe_vfio_pci_table);
+
+static struct pci_driver xe_vfio_pci_driver = {
+	.name = "xe-vfio-pci",
+	.id_table = xe_vfio_pci_table,
+	.probe = xe_vfio_pci_probe,
+	.remove = xe_vfio_pci_remove,
+	.err_handler = &xe_vfio_pci_err_handlers,
+	.driver_managed_dma = true,
+};
+module_pci_driver(xe_vfio_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("VFIO PCI driver with migration support for Intel Graphics");
-- 
2.50.1

RE: [PATCH v2 26/26] vfio/xe: Add vendor-specific vfio_pci driver for Intel graphics

Posted by Tian, Kevin 3 months, 2 weeks ago

> From: Winiarski, Michal <michal.winiarski@intel.com>
> Sent: Wednesday, October 22, 2025 6:42 AM
> To: Alex Williamson <alex.williamson@redhat.com>;

You may need to run get_maintainer.pl again. Alex just gets a new mail
address.

RE: [PATCH v2 26/26] vfio/xe: Add vendor-specific vfio_pci driver for Intel graphics

Posted by Tian, Kevin 3 months, 2 weeks ago

> From: Winiarski, Michal <michal.winiarski@intel.com>
> Sent: Wednesday, October 22, 2025 6:42 AM
> +
> +/**
> + * struct xe_vfio_pci_migration_file - file used for reading / writing migration
> data
> + */

let's use the comment style in vfio, i.e. "/*" instead of "/**"

> +struct xe_vfio_pci_migration_file {
> +	/** @filp: pointer to underlying &struct file */
> +	struct file *filp;
> +	/** @lock: serializes accesses to migration data */
> +	struct mutex lock;
> +	/** @xe_vdev: backpointer to &struct xe_vfio_pci_core_device */
> +	struct xe_vfio_pci_core_device *xe_vdev;

above comments are obvious...

> +struct xe_vfio_pci_core_device {
> +	/** @core_device: vendor-agnostic VFIO device */
> +	struct vfio_pci_core_device core_device;
> +
> +	/** @mig_state: current device migration state */
> +	enum vfio_device_mig_state mig_state;
> +
> +	/** @vfid: VF number used by PF, xe uses 1-based indexing for vfid
> */
> +	unsigned int vfid;

is 1-based indexing a sw or hw requirement?

> +
> +	/** @pf: pointer to driver_private of physical function */
> +	struct pci_dev *pf;
> +
> +	/** @fd: &struct xe_vfio_pci_migration_file for userspace to
> read/write migration data */
> +	struct xe_vfio_pci_migration_file *fd;

s/fd/migf/, as 'fd' is integer in all other places

btw it's risky w/o a lock protecting the state transition. See the usage of
state_mutex in other migration drivers.

> +static void xe_vfio_pci_reset_done(struct pci_dev *pdev)
> +{
> +	struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev);
> +	int ret;
> +
> +	ret = xe_sriov_vfio_wait_flr_done(xe_vdev->pf, xe_vdev->vfid);
> +	if (ret)
> +		dev_err(&pdev->dev, "Failed to wait for FLR: %d\n", ret);

why is there a device specific wait for flr done? suppose it's already
covered by pci core...

> +
> +	xe_vfio_pci_reset(xe_vdev);
> +}
> +
> +static const struct pci_error_handlers xe_vfio_pci_err_handlers = {
> +	.reset_done = xe_vfio_pci_reset_done,
> +};

missing ".error_detected "

> +static struct xe_vfio_pci_migration_file *
> +xe_vfio_pci_alloc_file(struct xe_vfio_pci_core_device *xe_vdev,
> +		       enum xe_vfio_pci_file_type type)
> +{
> +	struct xe_vfio_pci_migration_file *migf;
> +	const struct file_operations *fops;
> +	int flags;
> +
> +	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
> +	if (!migf)
> +		return ERR_PTR(-ENOMEM);
> +
> +	fops = type == XE_VFIO_FILE_SAVE ? &xe_vfio_pci_save_fops :
> &xe_vfio_pci_resume_fops;
> +	flags = type == XE_VFIO_FILE_SAVE ? O_RDONLY : O_WRONLY;
> +	migf->filp = anon_inode_getfile("xe_vfio_mig", fops, migf, flags);
> +	if (IS_ERR(migf->filp)) {
> +		kfree(migf);
> +		return ERR_CAST(migf->filp);
> +	}
> +
> +	mutex_init(&migf->lock);
> +	migf->xe_vdev = xe_vdev;
> +	xe_vdev->fd = migf;
> +
> +	stream_open(migf->filp->f_inode, migf->filp);
> +
> +	return migf;

miss a get_file(). vfio core will do another fput() upon error.

see vfio_ioct_mig_return_fd()

> +}
> +
> +static struct file *
> +xe_vfio_set_state(struct xe_vfio_pci_core_device *xe_vdev, u32 new)
> +{
> +	u32 cur = xe_vdev->mig_state;
> +	int ret;
> +
> +	dev_dbg(xe_vdev_to_dev(xe_vdev),
> +		"state: %s->%s\n", vfio_dev_state_str(cur),
> vfio_dev_state_str(new));
> +
> +	/*
> +	 * "STOP" handling is reused for "RUNNING_P2P", as the device
> doesn't have the capability to
> +	 * selectively block p2p DMA transfers.
> +	 * The device is not processing new workload requests when the VF is
> stopped, and both
> +	 * memory and MMIO communication channels are transferred to
> destination (where processing
> +	 * will be resumed).
> +	 */
> +	if ((cur == VFIO_DEVICE_STATE_RUNNING && new ==
> VFIO_DEVICE_STATE_STOP) ||

this is not required when P2P is supported. vfio_mig_get_next_state() will
find the right arc from RUNNING to RUNNING_P2P to STOP.

> +	    (cur == VFIO_DEVICE_STATE_RUNNING && new ==
> VFIO_DEVICE_STATE_RUNNING_P2P)) {
> +		ret = xe_sriov_vfio_stop(xe_vdev->pf, xe_vdev->vfid);
> +		if (ret)
> +			goto err;
> +
> +		return NULL;
> +	}

better to align with other drivers, s/stop/suspend/ and s/run/resume/

> +
> +	if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new ==
> VFIO_DEVICE_STATE_STOP) ||
> +	    (cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_RUNNING_P2P))
> +		return NULL;
> +
> +	if ((cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_RUNNING) ||
> +	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new ==
> VFIO_DEVICE_STATE_RUNNING)) {
> +		ret = xe_sriov_vfio_run(xe_vdev->pf, xe_vdev->vfid);
> +		if (ret)
> +			goto err;
> +
> +		return NULL;
> +	}
> +
> +	if (cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_STOP_COPY) {
> +		struct xe_vfio_pci_migration_file *migf;
> +
> +		migf = xe_vfio_pci_alloc_file(xe_vdev, XE_VFIO_FILE_SAVE);
> +		if (IS_ERR(migf)) {
> +			ret = PTR_ERR(migf);
> +			goto err;
> +		}
> +
> +		ret = xe_sriov_vfio_stop_copy_enter(xe_vdev->pf, xe_vdev-
> >vfid);
> +		if (ret) {
> +			fput(migf->filp);
> +			goto err;
> +		}
> +
> +		return migf->filp;
> +	}
> +
> +	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new ==
> VFIO_DEVICE_STATE_STOP)) {
> +		if (xe_vdev->fd)
> +			xe_vfio_pci_disable_file(xe_vdev->fd);
> +
> +		xe_sriov_vfio_stop_copy_exit(xe_vdev->pf, xe_vdev->vfid);
> +
> +		return NULL;
> +	}
> +
> +	if (cur == VFIO_DEVICE_STATE_STOP && new ==
> VFIO_DEVICE_STATE_RESUMING) {
> +		struct xe_vfio_pci_migration_file *migf;
> +
> +		migf = xe_vfio_pci_alloc_file(xe_vdev,
> XE_VFIO_FILE_RESUME);
> +		if (IS_ERR(migf)) {
> +			ret = PTR_ERR(migf);
> +			goto err;
> +		}
> +
> +		ret = xe_sriov_vfio_resume_enter(xe_vdev->pf, xe_vdev-
> >vfid);
> +		if (ret) {
> +			fput(migf->filp);
> +			goto err;
> +		}
> +
> +		return migf->filp;
> +	}
> +
> +	if (cur == VFIO_DEVICE_STATE_RESUMING && new ==
> VFIO_DEVICE_STATE_STOP) {
> +		if (xe_vdev->fd)
> +			xe_vfio_pci_disable_file(xe_vdev->fd);
> +
> +		xe_sriov_vfio_resume_exit(xe_vdev->pf, xe_vdev->vfid);
> +
> +		return NULL;
> +	}
> +
> +	if (new == VFIO_DEVICE_STATE_ERROR)
> +		xe_sriov_vfio_error(xe_vdev->pf, xe_vdev->vfid);

the ERROR state is not passed to the variant driver. You'll get -EINVAL 
from vfio_mig_get_next_state(). so this is dead code.

If the pf driver needs to be notified, you could check the ret value instead.

> +static void xe_vfio_pci_migration_init(struct vfio_device *core_vdev)
> +{
> +	struct xe_vfio_pci_core_device *xe_vdev =
> +		container_of(core_vdev, struct xe_vfio_pci_core_device,
> core_device.vdev);
> +	struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
> +
> +	if (!xe_sriov_vfio_migration_supported(pdev->physfn))
> +		return;
> +
> +	/* vfid starts from 1 for xe */
> +	xe_vdev->vfid = pci_iov_vf_id(pdev) + 1;

pci_iov_vf_id() returns error if it's not vf. should be checked.

> +static int xe_vfio_pci_init_dev(struct vfio_device *core_vdev)
> +{
> +	struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
> +
> +	if (pdev->is_virtfn && strcmp(pdev->physfn->dev.driver->name, "xe")
> == 0)
> +		xe_vfio_pci_migration_init(core_vdev);

I didn't see the point of checking the driver name.

> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Intel Corporation");

please use the author name, as other drivers do