[PATCH v2 03/26] drm/xe/pf: Add save/restore control state stubs and connect to debugfs

Michał Winiarski posted 26 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH v2 03/26] drm/xe/pf: Add save/restore control state stubs and connect to debugfs
Posted by Michał Winiarski 3 months, 2 weeks ago
The states will be used by upcoming changes to produce (in case of save)
or consume (in case of resume) the VF migration data.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c   | 248 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h   |   6 +
 .../gpu/drm/xe/xe_gt_sriov_pf_control_types.h |  14 +
 drivers/gpu/drm/xe/xe_sriov_pf_control.c      |  96 +++++++
 drivers/gpu/drm/xe/xe_sriov_pf_control.h      |   4 +
 drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c      |  38 +++
 6 files changed, 406 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
index 2e6bd3d1fe1da..b770916e88e53 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -184,6 +184,12 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
 	CASE2STR(PAUSE_SAVE_GUC);
 	CASE2STR(PAUSE_FAILED);
 	CASE2STR(PAUSED);
+	CASE2STR(SAVE_WIP);
+	CASE2STR(SAVE_FAILED);
+	CASE2STR(SAVED);
+	CASE2STR(RESTORE_WIP);
+	CASE2STR(RESTORE_FAILED);
+	CASE2STR(RESTORED);
 	CASE2STR(RESUME_WIP);
 	CASE2STR(RESUME_SEND_RESUME);
 	CASE2STR(RESUME_FAILED);
@@ -208,6 +214,8 @@ static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
 	case XE_GT_SRIOV_STATE_FLR_WIP:
 	case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
 		return 5 * HZ;
+	case XE_GT_SRIOV_STATE_RESTORE_WIP:
+		return 20 * HZ;
 	default:
 		return HZ;
 	}
@@ -329,6 +337,8 @@ static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED);
 }
 
 #define pf_enter_vf_state_machine_bug(gt, vfid) ({	\
@@ -359,6 +369,8 @@ static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
 
 static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
 static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
+static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid);
+static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid);
 static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
 static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
 
@@ -380,6 +392,8 @@ static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
 
 		pf_exit_vf_flr_wip(gt, vfid);
 		pf_exit_vf_stop_wip(gt, vfid);
+		pf_exit_vf_save_wip(gt, vfid);
+		pf_exit_vf_restore_wip(gt, vfid);
 		pf_exit_vf_pause_wip(gt, vfid);
 		pf_exit_vf_resume_wip(gt, vfid);
 
@@ -399,6 +413,8 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
 	pf_exit_vf_mismatch(gt, vfid);
 	pf_exit_vf_wip(gt, vfid);
 }
@@ -675,6 +691,8 @@ static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
 {
 	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
 	pf_exit_vf_mismatch(gt, vfid);
 	pf_exit_vf_wip(gt, vfid);
 }
@@ -753,6 +771,16 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
 		return -EPERM;
 	}
 
+	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
+		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
+		return -EBUSY;
+	}
+
+	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
+		return -EBUSY;
+	}
+
 	if (!pf_enter_vf_resume_wip(gt, vfid)) {
 		xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
 		return -EALREADY;
@@ -776,6 +804,218 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
 	return -ECANCELED;
 }
 
+static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
+{
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP);
+}
+
+static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid)
+{
+	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED))
+		pf_enter_vf_state_machine_bug(gt, vfid);
+
+	xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid);
+
+	pf_exit_vf_mismatch(gt, vfid);
+	pf_exit_vf_wip(gt, vfid);
+	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+}
+
+static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid)
+{
+	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP))
+		return false;
+
+	pf_enter_vf_saved(gt, vfid);
+
+	return true;
+}
+
+static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
+{
+	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
+		pf_enter_vf_wip(gt, vfid);
+		pf_queue_vf(gt, vfid);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid)
+{
+	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
+		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
+		return -EPERM;
+	}
+
+	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
+		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
+		return -EPERM;
+	}
+
+	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
+		return -EBUSY;
+	}
+
+	if (!pf_enter_vf_save_wip(gt, vfid)) {
+		xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid);
+		return -EALREADY;
+	}
+
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid)
+{
+	if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) {
+		pf_enter_vf_mismatch(gt, vfid);
+		return -EIO;
+	}
+
+	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+
+	return 0;
+}
+
+static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
+{
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP);
+}
+
+static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid)
+{
+	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED))
+		pf_enter_vf_state_machine_bug(gt, vfid);
+
+	xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid);
+
+	pf_exit_vf_mismatch(gt, vfid);
+	pf_exit_vf_wip(gt, vfid);
+	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+}
+
+static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid)
+{
+	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP))
+		return false;
+
+	pf_enter_vf_restored(gt, vfid);
+
+	return true;
+}
+
+static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
+{
+	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+		pf_enter_vf_wip(gt, vfid);
+		pf_queue_vf(gt, vfid);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid)
+{
+	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
+		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
+		return -EPERM;
+	}
+
+	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
+		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
+		return -EPERM;
+	}
+
+	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
+		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
+		return -EBUSY;
+	}
+
+	if (!pf_enter_vf_restore_wip(gt, vfid)) {
+		xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid);
+		return -EALREADY;
+	}
+
+	return 0;
+}
+
+static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid)
+{
+	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP);
+	int err;
+
+	err = pf_wait_vf_wip_done(gt, vfid, timeout);
+	if (err) {
+		xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n",
+				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
+		return err;
+	}
+
+	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid)
+{
+	int ret;
+
+	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+		ret = pf_wait_vf_restore_done(gt, vfid);
+		if (ret)
+			return ret;
+	}
+
+	if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) {
+		pf_enter_vf_mismatch(gt, vfid);
+		return -EIO;
+	}
+
+	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+
+	return 0;
+}
+
 /**
  * DOC: The VF STOP state machine
  *
@@ -817,6 +1057,8 @@ static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
 
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
 	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
+	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
 	pf_exit_vf_mismatch(gt, vfid);
 	pf_exit_vf_wip(gt, vfid);
 }
@@ -1461,6 +1703,12 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
 	if (pf_exit_vf_pause_save_guc(gt, vfid))
 		return true;
 
+	if (pf_handle_vf_save(gt, vfid))
+		return true;
+
+	if (pf_handle_vf_restore(gt, vfid))
+		return true;
+
 	if (pf_exit_vf_resume_send_resume(gt, vfid))
 		return true;
 
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
index 8a72ef3778d47..abc233f6302ed 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
@@ -14,8 +14,14 @@ struct xe_gt;
 int xe_gt_sriov_pf_control_init(struct xe_gt *gt);
 void xe_gt_sriov_pf_control_restart(struct xe_gt *gt);
 
+bool xe_gt_sriov_pf_control_check_vf_data_wip(struct xe_gt *gt, unsigned int vfid);
+
 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid);
 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid);
 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
 int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid);
 int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
index c80b7e77f1ad2..e113dc98b33ce 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
@@ -31,6 +31,12 @@
  * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state.
  * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed.
  * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused.
+ * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress.
+ * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed.
+ * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved.
+ * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress.
+ * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed.
+ * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored.
  * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress.
  * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command.
  * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed.
@@ -63,6 +69,14 @@ enum xe_gt_sriov_control_bits {
 	XE_GT_SRIOV_STATE_PAUSE_FAILED,
 	XE_GT_SRIOV_STATE_PAUSED,
 
+	XE_GT_SRIOV_STATE_SAVE_WIP,
+	XE_GT_SRIOV_STATE_SAVE_FAILED,
+	XE_GT_SRIOV_STATE_SAVED,
+
+	XE_GT_SRIOV_STATE_RESTORE_WIP,
+	XE_GT_SRIOV_STATE_RESTORE_FAILED,
+	XE_GT_SRIOV_STATE_RESTORED,
+
 	XE_GT_SRIOV_STATE_RESUME_WIP,
 	XE_GT_SRIOV_STATE_RESUME_SEND_RESUME,
 	XE_GT_SRIOV_STATE_RESUME_FAILED,
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
index 416d00a03fbb7..8d8a01faf5291 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
@@ -149,3 +149,99 @@ int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid)
 
 	return 0;
 }
+
+/**
+ * xe_sriov_pf_control_trigger_save_vf - Start a VF migration data SAVE sequence on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int ret;
+
+	for_each_gt(gt, xe, id) {
+		ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * xe_sriov_pf_control_finish_save_vf - Complete a VF migration data SAVE sequence on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int ret;
+
+	for_each_gt(gt, xe, id) {
+		ret = xe_gt_sriov_pf_control_finish_save_vf(gt, vfid);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/**
+ * xe_sriov_pf_control_trigger_restore_vf - Start a VF migration data RESTORE sequence on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int ret;
+
+	for_each_gt(gt, xe, id) {
+		ret = xe_gt_sriov_pf_control_trigger_restore_vf(gt, vfid);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+/**
+ * xe_sriov_pf_control_wait_restore_vf - Complete a VF migration data RESTORE sequence in all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int ret;
+
+	for_each_gt(gt, xe, id) {
+		ret = xe_gt_sriov_pf_control_finish_restore_vf(gt, vfid);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h
index 2d52d0ac1b28f..30318c1fba34e 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_control.h
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h
@@ -13,5 +13,9 @@ int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid);
 int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid);
 int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid);
 int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
index a81aa05c55326..e0e6340c49106 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
@@ -136,11 +136,31 @@ static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent)
  *      │   │   ├── reset
  *      │   │   ├── resume
  *      │   │   ├── stop
+ *      │   │   ├── save
+ *      │   │   ├── restore
  *      │   │   :
  *      │   ├── vf2
  *      │   │   ├── ...
  */
 
+static int from_file_read_to_vf_call(struct seq_file *s,
+				     int (*call)(struct xe_device *, unsigned int))
+{
+	struct dentry *dent = file_dentry(s->file)->d_parent;
+	struct xe_device *xe = extract_xe(dent);
+	unsigned int vfid = extract_vfid(dent);
+	int ret;
+
+	xe_pm_runtime_get(xe);
+	ret = call(xe, vfid);
+	xe_pm_runtime_put(xe);
+
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
 static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *userbuf,
 					  size_t count, loff_t *ppos,
 					  int (*call)(struct xe_device *, unsigned int))
@@ -179,10 +199,26 @@ static ssize_t OP##_write(struct file *file, const char __user *userbuf,	\
 }										\
 DEFINE_SHOW_STORE_ATTRIBUTE(OP)
 
+#define DEFINE_VF_CONTROL_ATTRIBUTE_RW(OP)					\
+static int OP##_show(struct seq_file *s, void *unused)				\
+{										\
+	return from_file_read_to_vf_call(s,					\
+					 xe_sriov_pf_control_finish_##OP);	\
+}										\
+static ssize_t OP##_write(struct file *file, const char __user *userbuf,	\
+			  size_t count, loff_t *ppos)				\
+{										\
+	return from_file_write_to_vf_call(file, userbuf, count, ppos,		\
+					  xe_sriov_pf_control_trigger_##OP);	\
+}										\
+DEFINE_SHOW_STORE_ATTRIBUTE(OP)
+
 DEFINE_VF_CONTROL_ATTRIBUTE(pause_vf);
 DEFINE_VF_CONTROL_ATTRIBUTE(resume_vf);
 DEFINE_VF_CONTROL_ATTRIBUTE(stop_vf);
 DEFINE_VF_CONTROL_ATTRIBUTE(reset_vf);
+DEFINE_VF_CONTROL_ATTRIBUTE_RW(save_vf);
+DEFINE_VF_CONTROL_ATTRIBUTE_RW(restore_vf);
 
 static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent)
 {
@@ -190,6 +226,8 @@ static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent)
 	debugfs_create_file("resume", 0200, vfdent, xe, &resume_vf_fops);
 	debugfs_create_file("stop", 0200, vfdent, xe, &stop_vf_fops);
 	debugfs_create_file("reset", 0200, vfdent, xe, &reset_vf_fops);
+	debugfs_create_file("save", 0600, vfdent, xe, &save_vf_fops);
+	debugfs_create_file("restore", 0600, vfdent, xe, &restore_vf_fops);
 }
 
 static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid)
-- 
2.50.1

RE: [PATCH v2 03/26] drm/xe/pf: Add save/restore control state stubs and connect to debugfs
Posted by Tian, Kevin 3 months, 2 weeks ago
> From: Winiarski, Michal <michal.winiarski@intel.com>
> Sent: Wednesday, October 22, 2025 6:41 AM
>
> +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int
> vfid)

the prefix is too long. xe_gt_sriov_trigger_save_vf() or
xe_gt_trigger_save_vf() is sufficient. 
Re: [PATCH v2 03/26] drm/xe/pf: Add save/restore control state stubs and connect to debugfs
Posted by Michal Wajdeczko 3 months, 2 weeks ago

On 10/28/2025 4:06 AM, Tian, Kevin wrote:
>> From: Winiarski, Michal <michal.winiarski@intel.com>
>> Sent: Wednesday, October 22, 2025 6:41 AM
>>
>> +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int
>> vfid)
> 
> the prefix is too long. xe_gt_sriov_trigger_save_vf() or
> xe_gt_trigger_save_vf() is sufficient. 

on the Xe driver we name functions based on the sub-component name

xe_sriov_vfio.c
	= xe|sriov|vfio
	= Xe driver | SR-IOV feature | VFIO interface

xe_sriov_pf_control.c
	= xe|sriov|pf|control
	= Xe driver | SR-IOV feature | PF specific | control

xe_gt_sriov_pf_control.c
	= xe|gt|sriov|pf|control
	= Xe driver | GT-related | SR-IOV feature | PF specific | control

and only functions from the xe|sriov|vfio component will be exported
for use by the xe vfio driver (hence the vfio tag in their names) and
other functions will be internal to the Xe driver
Re: [PATCH v2 03/26] drm/xe/pf: Add save/restore control state stubs and connect to debugfs
Posted by Michal Wajdeczko 3 months, 2 weeks ago

On 10/22/2025 12:41 AM, Michał Winiarski wrote:
> The states will be used by upcoming changes to produce (in case of save)
> or consume (in case of resume) the VF migration data.
> 
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c   | 248 ++++++++++++++++++
>  drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h   |   6 +
>  .../gpu/drm/xe/xe_gt_sriov_pf_control_types.h |  14 +
>  drivers/gpu/drm/xe/xe_sriov_pf_control.c      |  96 +++++++
>  drivers/gpu/drm/xe/xe_sriov_pf_control.h      |   4 +
>  drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c      |  38 +++
>  6 files changed, 406 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> index 2e6bd3d1fe1da..b770916e88e53 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> @@ -184,6 +184,12 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
>  	CASE2STR(PAUSE_SAVE_GUC);
>  	CASE2STR(PAUSE_FAILED);
>  	CASE2STR(PAUSED);
> +	CASE2STR(SAVE_WIP);
> +	CASE2STR(SAVE_FAILED);
> +	CASE2STR(SAVED);
> +	CASE2STR(RESTORE_WIP);
> +	CASE2STR(RESTORE_FAILED);
> +	CASE2STR(RESTORED);
>  	CASE2STR(RESUME_WIP);
>  	CASE2STR(RESUME_SEND_RESUME);
>  	CASE2STR(RESUME_FAILED);
> @@ -208,6 +214,8 @@ static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
>  	case XE_GT_SRIOV_STATE_FLR_WIP:
>  	case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
>  		return 5 * HZ;
> +	case XE_GT_SRIOV_STATE_RESTORE_WIP:
> +		return 20 * HZ;
>  	default:
>  		return HZ;
>  	}
> @@ -329,6 +337,8 @@ static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED);
>  }
>  
>  #define pf_enter_vf_state_machine_bug(gt, vfid) ({	\
> @@ -359,6 +369,8 @@ static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
>  
>  static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
>  static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
> +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid);
> +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid);
>  static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
>  static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
>  
> @@ -380,6 +392,8 @@ static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
>  
>  		pf_exit_vf_flr_wip(gt, vfid);
>  		pf_exit_vf_stop_wip(gt, vfid);
> +		pf_exit_vf_save_wip(gt, vfid);
> +		pf_exit_vf_restore_wip(gt, vfid);
>  		pf_exit_vf_pause_wip(gt, vfid);
>  		pf_exit_vf_resume_wip(gt, vfid);
>  
> @@ -399,6 +413,8 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
>  	pf_exit_vf_mismatch(gt, vfid);
>  	pf_exit_vf_wip(gt, vfid);
>  }
> @@ -675,6 +691,8 @@ static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
>  {
>  	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
>  	pf_exit_vf_mismatch(gt, vfid);
>  	pf_exit_vf_wip(gt, vfid);
>  }
> @@ -753,6 +771,16 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
>  		return -EPERM;
>  	}
>  
> +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
> +		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
> +		return -EBUSY;
> +	}
> +
> +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> +		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
> +		return -EBUSY;
> +	}
> +
>  	if (!pf_enter_vf_resume_wip(gt, vfid)) {
>  		xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
>  		return -EALREADY;
> @@ -776,6 +804,218 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
>  	return -ECANCELED;
>  }
>  
> +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
> +{
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP);
> +}
> +
> +static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED))
> +		pf_enter_vf_state_machine_bug(gt, vfid);
> +
> +	xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid);

nit: you can move expect(PAUSED) here

> +
> +	pf_exit_vf_mismatch(gt, vfid);
> +	pf_exit_vf_wip(gt, vfid);
> +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> +}
> +
> +static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP))
> +		return false;
> +
> +	pf_enter_vf_saved(gt, vfid);
> +
> +	return true;
> +}
> +
> +static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
> +		pf_enter_vf_wip(gt, vfid);
> +		pf_queue_vf(gt, vfid);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +/**
> + * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
> +		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
> +		return -EPERM;
> +	}
> +
> +	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
> +		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
> +		return -EPERM;
> +	}
> +
> +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> +		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
> +		return -EBUSY;
> +	}
> +
> +	if (!pf_enter_vf_save_wip(gt, vfid)) {
> +		xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid);
> +		return -EALREADY;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) {
> +		pf_enter_vf_mismatch(gt, vfid);
> +		return -EIO;
> +	}
> +
> +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> +
> +	return 0;
> +}
> +
> +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
> +{
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP);
> +}
> +
> +static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED))
> +		pf_enter_vf_state_machine_bug(gt, vfid);
> +
> +	xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid);
> +
> +	pf_exit_vf_mismatch(gt, vfid);
> +	pf_exit_vf_wip(gt, vfid);
> +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> +}
> +
> +static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP))
> +		return false;
> +
> +	pf_enter_vf_restored(gt, vfid);
> +
> +	return true;
> +}
> +
> +static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> +		pf_enter_vf_wip(gt, vfid);
> +		pf_queue_vf(gt, vfid);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +/**
> + * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid)
> +{
> +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
> +		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
> +		return -EPERM;
> +	}
> +
> +	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
> +		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
> +		return -EPERM;
> +	}
> +
> +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
> +		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
> +		return -EBUSY;
> +	}
> +
> +	if (!pf_enter_vf_restore_wip(gt, vfid)) {
> +		xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid);
> +		return -EALREADY;
> +	}
> +
> +	return 0;
> +}
> +
> +static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid)
> +{
> +	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP);
> +	int err;
> +
> +	err = pf_wait_vf_wip_done(gt, vfid, timeout);
> +	if (err) {
> +		xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n",
> +				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
> +		return err;
> +	}
> +
> +	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED))
> +		return -EIO;
> +
> +	return 0;
> +}
> +
> +/**
> + * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid)
> +{
> +	int ret;
> +
> +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> +		ret = pf_wait_vf_restore_done(gt, vfid);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) {
> +		pf_enter_vf_mismatch(gt, vfid);
> +		return -EIO;
> +	}
> +
> +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> +
> +	return 0;
> +}
> +
>  /**
>   * DOC: The VF STOP state machine
>   *
> @@ -817,6 +1057,8 @@ static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
>  
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
>  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
> +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
>  	pf_exit_vf_mismatch(gt, vfid);
>  	pf_exit_vf_wip(gt, vfid);
>  }
> @@ -1461,6 +1703,12 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
>  	if (pf_exit_vf_pause_save_guc(gt, vfid))
>  		return true;
>  
> +	if (pf_handle_vf_save(gt, vfid))
> +		return true;
> +
> +	if (pf_handle_vf_restore(gt, vfid))
> +		return true;
> +
>  	if (pf_exit_vf_resume_send_resume(gt, vfid))
>  		return true;
>  
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
> index 8a72ef3778d47..abc233f6302ed 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
> @@ -14,8 +14,14 @@ struct xe_gt;
>  int xe_gt_sriov_pf_control_init(struct xe_gt *gt);
>  void xe_gt_sriov_pf_control_restart(struct xe_gt *gt);
>  
> +bool xe_gt_sriov_pf_control_check_vf_data_wip(struct xe_gt *gt, unsigned int vfid);
> +
>  int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid);
>  int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid);
> +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid);
> +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid);
> +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid);
> +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid);
>  int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
>  int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid);
>  int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync);
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
> index c80b7e77f1ad2..e113dc98b33ce 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
> @@ -31,6 +31,12 @@
>   * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state.
>   * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed.
>   * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused.
> + * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress.
> + * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed.
> + * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved.
> + * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress.
> + * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed.
> + * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored.
>   * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress.
>   * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command.
>   * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed.
> @@ -63,6 +69,14 @@ enum xe_gt_sriov_control_bits {
>  	XE_GT_SRIOV_STATE_PAUSE_FAILED,
>  	XE_GT_SRIOV_STATE_PAUSED,
>  
> +	XE_GT_SRIOV_STATE_SAVE_WIP,
> +	XE_GT_SRIOV_STATE_SAVE_FAILED,
> +	XE_GT_SRIOV_STATE_SAVED,
> +
> +	XE_GT_SRIOV_STATE_RESTORE_WIP,
> +	XE_GT_SRIOV_STATE_RESTORE_FAILED,
> +	XE_GT_SRIOV_STATE_RESTORED,
> +
>  	XE_GT_SRIOV_STATE_RESUME_WIP,
>  	XE_GT_SRIOV_STATE_RESUME_SEND_RESUME,
>  	XE_GT_SRIOV_STATE_RESUME_FAILED,

it is easier to understand those states after patch 04/26 with diagrams,
and while there are small and hard to avoid overlaps between 03/26 and 04/26
the patch itself LGTM, so

Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>

> diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
> index 416d00a03fbb7..8d8a01faf5291 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
> @@ -149,3 +149,99 @@ int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid)
>  
>  	return 0;
>  }
> +
> +/**
> + * xe_sriov_pf_control_trigger_save_vf - Start a VF migration data SAVE sequence on all GTs.
> + * @xe: the &xe_device
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid)
> +{
> +	struct xe_gt *gt;
> +	unsigned int id;
> +	int ret;
> +
> +	for_each_gt(gt, xe, id) {
> +		ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * xe_sriov_pf_control_finish_save_vf - Complete a VF migration data SAVE sequence on all GTs.
> + * @xe: the &xe_device
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid)
> +{
> +	struct xe_gt *gt;
> +	unsigned int id;
> +	int ret;
> +
> +	for_each_gt(gt, xe, id) {
> +		ret = xe_gt_sriov_pf_control_finish_save_vf(gt, vfid);
> +		if (ret)
> +			break;
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * xe_sriov_pf_control_trigger_restore_vf - Start a VF migration data RESTORE sequence on all GTs.
> + * @xe: the &xe_device
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid)
> +{
> +	struct xe_gt *gt;
> +	unsigned int id;
> +	int ret;
> +
> +	for_each_gt(gt, xe, id) {
> +		ret = xe_gt_sriov_pf_control_trigger_restore_vf(gt, vfid);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * xe_sriov_pf_control_wait_restore_vf - Complete a VF migration data RESTORE sequence in all GTs.
> + * @xe: the &xe_device
> + * @vfid: the VF identifier
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid)
> +{
> +	struct xe_gt *gt;
> +	unsigned int id;
> +	int ret;
> +
> +	for_each_gt(gt, xe, id) {
> +		ret = xe_gt_sriov_pf_control_finish_restore_vf(gt, vfid);
> +		if (ret)
> +			break;
> +	}
> +
> +	return ret;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h
> index 2d52d0ac1b28f..30318c1fba34e 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h
> @@ -13,5 +13,9 @@ int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid);
>  int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid);
>  int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid);
>  int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid);
> +int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid);
> +int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid);
> +int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid);
> +int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid);
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
> index a81aa05c55326..e0e6340c49106 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
> @@ -136,11 +136,31 @@ static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent)
>   *      │   │   ├── reset
>   *      │   │   ├── resume
>   *      │   │   ├── stop
> + *      │   │   ├── save
> + *      │   │   ├── restore
>   *      │   │   :
>   *      │   ├── vf2
>   *      │   │   ├── ...
>   */
>  
> +static int from_file_read_to_vf_call(struct seq_file *s,
> +				     int (*call)(struct xe_device *, unsigned int))
> +{
> +	struct dentry *dent = file_dentry(s->file)->d_parent;
> +	struct xe_device *xe = extract_xe(dent);
> +	unsigned int vfid = extract_vfid(dent);
> +	int ret;
> +
> +	xe_pm_runtime_get(xe);
> +	ret = call(xe, vfid);
> +	xe_pm_runtime_put(xe);
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;
> +}
> +
>  static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *userbuf,
>  					  size_t count, loff_t *ppos,
>  					  int (*call)(struct xe_device *, unsigned int))
> @@ -179,10 +199,26 @@ static ssize_t OP##_write(struct file *file, const char __user *userbuf,	\
>  }										\
>  DEFINE_SHOW_STORE_ATTRIBUTE(OP)
>  
> +#define DEFINE_VF_CONTROL_ATTRIBUTE_RW(OP)					\
> +static int OP##_show(struct seq_file *s, void *unused)				\
> +{										\
> +	return from_file_read_to_vf_call(s,					\
> +					 xe_sriov_pf_control_finish_##OP);	\
> +}										\
> +static ssize_t OP##_write(struct file *file, const char __user *userbuf,	\
> +			  size_t count, loff_t *ppos)				\
> +{										\
> +	return from_file_write_to_vf_call(file, userbuf, count, ppos,		\
> +					  xe_sriov_pf_control_trigger_##OP);	\
> +}										\
> +DEFINE_SHOW_STORE_ATTRIBUTE(OP)
> +
>  DEFINE_VF_CONTROL_ATTRIBUTE(pause_vf);
>  DEFINE_VF_CONTROL_ATTRIBUTE(resume_vf);
>  DEFINE_VF_CONTROL_ATTRIBUTE(stop_vf);
>  DEFINE_VF_CONTROL_ATTRIBUTE(reset_vf);
> +DEFINE_VF_CONTROL_ATTRIBUTE_RW(save_vf);
> +DEFINE_VF_CONTROL_ATTRIBUTE_RW(restore_vf);
>  
>  static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent)
>  {
> @@ -190,6 +226,8 @@ static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent)
>  	debugfs_create_file("resume", 0200, vfdent, xe, &resume_vf_fops);
>  	debugfs_create_file("stop", 0200, vfdent, xe, &stop_vf_fops);
>  	debugfs_create_file("reset", 0200, vfdent, xe, &reset_vf_fops);
> +	debugfs_create_file("save", 0600, vfdent, xe, &save_vf_fops);
> +	debugfs_create_file("restore", 0600, vfdent, xe, &restore_vf_fops);
>  }
>  
>  static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid)

Re: [PATCH v2 03/26] drm/xe/pf: Add save/restore control state stubs and connect to debugfs
Posted by Michał Winiarski 3 months, 2 weeks ago
On Thu, Oct 23, 2025 at 12:31:47AM +0200, Michal Wajdeczko wrote:
> 
> 
> On 10/22/2025 12:41 AM, Michał Winiarski wrote:
> > The states will be used by upcoming changes to produce (in case of save)
> > or consume (in case of resume) the VF migration data.
> > 
> > Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c   | 248 ++++++++++++++++++
> >  drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h   |   6 +
> >  .../gpu/drm/xe/xe_gt_sriov_pf_control_types.h |  14 +
> >  drivers/gpu/drm/xe/xe_sriov_pf_control.c      |  96 +++++++
> >  drivers/gpu/drm/xe/xe_sriov_pf_control.h      |   4 +
> >  drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c      |  38 +++
> >  6 files changed, 406 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> > index 2e6bd3d1fe1da..b770916e88e53 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> > @@ -184,6 +184,12 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
> >  	CASE2STR(PAUSE_SAVE_GUC);
> >  	CASE2STR(PAUSE_FAILED);
> >  	CASE2STR(PAUSED);
> > +	CASE2STR(SAVE_WIP);
> > +	CASE2STR(SAVE_FAILED);
> > +	CASE2STR(SAVED);
> > +	CASE2STR(RESTORE_WIP);
> > +	CASE2STR(RESTORE_FAILED);
> > +	CASE2STR(RESTORED);
> >  	CASE2STR(RESUME_WIP);
> >  	CASE2STR(RESUME_SEND_RESUME);
> >  	CASE2STR(RESUME_FAILED);
> > @@ -208,6 +214,8 @@ static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
> >  	case XE_GT_SRIOV_STATE_FLR_WIP:
> >  	case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
> >  		return 5 * HZ;
> > +	case XE_GT_SRIOV_STATE_RESTORE_WIP:
> > +		return 20 * HZ;
> >  	default:
> >  		return HZ;
> >  	}
> > @@ -329,6 +337,8 @@ static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED);
> >  }
> >  
> >  #define pf_enter_vf_state_machine_bug(gt, vfid) ({	\
> > @@ -359,6 +369,8 @@ static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
> >  
> >  static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
> >  static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
> > +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid);
> > +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid);
> >  static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
> >  static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
> >  
> > @@ -380,6 +392,8 @@ static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
> >  
> >  		pf_exit_vf_flr_wip(gt, vfid);
> >  		pf_exit_vf_stop_wip(gt, vfid);
> > +		pf_exit_vf_save_wip(gt, vfid);
> > +		pf_exit_vf_restore_wip(gt, vfid);
> >  		pf_exit_vf_pause_wip(gt, vfid);
> >  		pf_exit_vf_resume_wip(gt, vfid);
> >  
> > @@ -399,6 +413,8 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
> >  	pf_exit_vf_mismatch(gt, vfid);
> >  	pf_exit_vf_wip(gt, vfid);
> >  }
> > @@ -675,6 +691,8 @@ static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
> >  {
> >  	pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
> >  	pf_exit_vf_mismatch(gt, vfid);
> >  	pf_exit_vf_wip(gt, vfid);
> >  }
> > @@ -753,6 +771,16 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
> >  		return -EPERM;
> >  	}
> >  
> > +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
> > +		return -EBUSY;
> > +	}
> > +
> > +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
> > +		return -EBUSY;
> > +	}
> > +
> >  	if (!pf_enter_vf_resume_wip(gt, vfid)) {
> >  		xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
> >  		return -EALREADY;
> > @@ -776,6 +804,218 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
> >  	return -ECANCELED;
> >  }
> >  
> > +static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP);
> > +}
> > +
> > +static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED))
> > +		pf_enter_vf_state_machine_bug(gt, vfid);
> > +
> > +	xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid);
> 
> nit: you can move expect(PAUSED) here

Ok.

> 
> > +
> > +	pf_exit_vf_mismatch(gt, vfid);
> > +	pf_exit_vf_wip(gt, vfid);
> > +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> > +}
> > +
> > +static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP))
> > +		return false;
> > +
> > +	pf_enter_vf_saved(gt, vfid);
> > +
> > +	return true;
> > +}
> > +
> > +static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
> > +		pf_enter_vf_wip(gt, vfid);
> > +		pf_queue_vf(gt, vfid);
> > +		return true;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> > +/**
> > + * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence.
> > + * @gt: the &xe_gt
> > + * @vfid: the VF identifier
> > + *
> > + * This function is for PF only.
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
> > +		return -EPERM;
> > +	}
> > +
> > +	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
> > +		return -EPERM;
> > +	}
> > +
> > +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
> > +		return -EBUSY;
> > +	}
> > +
> > +	if (!pf_enter_vf_save_wip(gt, vfid)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid);
> > +		return -EALREADY;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence.
> > + * @gt: the &xe_gt
> > + * @vfid: the VF identifier
> > + *
> > + * This function is for PF only.
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED)) {
> > +		pf_enter_vf_mismatch(gt, vfid);
> > +		return -EIO;
> > +	}
> > +
> > +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> > +
> > +	return 0;
> > +}
> > +
> > +static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP);
> > +}
> > +
> > +static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED))
> > +		pf_enter_vf_state_machine_bug(gt, vfid);
> > +
> > +	xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid);
> > +
> > +	pf_exit_vf_mismatch(gt, vfid);
> > +	pf_exit_vf_wip(gt, vfid);
> > +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> > +}
> > +
> > +static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP))
> > +		return false;
> > +
> > +	pf_enter_vf_restored(gt, vfid);
> > +
> > +	return true;
> > +}
> > +
> > +static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> > +		pf_enter_vf_wip(gt, vfid);
> > +		pf_queue_vf(gt, vfid);
> > +		return true;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> > +/**
> > + * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence.
> > + * @gt: the &xe_gt
> > + * @vfid: the VF identifier
> > + *
> > + * This function is for PF only.
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
> > +		return -EPERM;
> > +	}
> > +
> > +	if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
> > +		return -EPERM;
> > +	}
> > +
> > +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
> > +		return -EBUSY;
> > +	}
> > +
> > +	if (!pf_enter_vf_restore_wip(gt, vfid)) {
> > +		xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid);
> > +		return -EALREADY;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP);
> > +	int err;
> > +
> > +	err = pf_wait_vf_wip_done(gt, vfid, timeout);
> > +	if (err) {
> > +		xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n",
> > +				   vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
> > +		return err;
> > +	}
> > +
> > +	if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED))
> > +		return -EIO;
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence.
> > + * @gt: the &xe_gt
> > + * @vfid: the VF identifier
> > + *
> > + * This function is for PF only.
> > + *
> > + * Return: 0 on success or a negative error code on failure.
> > + */
> > +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid)
> > +{
> > +	int ret;
> > +
> > +	if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
> > +		ret = pf_wait_vf_restore_done(gt, vfid);
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> > +	if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) {
> > +		pf_enter_vf_mismatch(gt, vfid);
> > +		return -EIO;
> > +	}
> > +
> > +	pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> > +
> > +	return 0;
> > +}
> > +
> >  /**
> >   * DOC: The VF STOP state machine
> >   *
> > @@ -817,6 +1057,8 @@ static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
> >  
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
> >  	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
> > +	pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
> >  	pf_exit_vf_mismatch(gt, vfid);
> >  	pf_exit_vf_wip(gt, vfid);
> >  }
> > @@ -1461,6 +1703,12 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
> >  	if (pf_exit_vf_pause_save_guc(gt, vfid))
> >  		return true;
> >  
> > +	if (pf_handle_vf_save(gt, vfid))
> > +		return true;
> > +
> > +	if (pf_handle_vf_restore(gt, vfid))
> > +		return true;
> > +
> >  	if (pf_exit_vf_resume_send_resume(gt, vfid))
> >  		return true;
> >  
> > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
> > index 8a72ef3778d47..abc233f6302ed 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
> > @@ -14,8 +14,14 @@ struct xe_gt;
> >  int xe_gt_sriov_pf_control_init(struct xe_gt *gt);
> >  void xe_gt_sriov_pf_control_restart(struct xe_gt *gt);
> >  
> > +bool xe_gt_sriov_pf_control_check_vf_data_wip(struct xe_gt *gt, unsigned int vfid);
> > +
> >  int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid);
> >  int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid);
> > +int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid);
> > +int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid);
> > +int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid);
> > +int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid);
> >  int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
> >  int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid);
> >  int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync);
> > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
> > index c80b7e77f1ad2..e113dc98b33ce 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
> > @@ -31,6 +31,12 @@
> >   * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state.
> >   * @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed.
> >   * @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused.
> > + * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress.
> > + * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed.
> > + * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved.
> > + * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress.
> > + * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed.
> > + * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored.
> >   * @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress.
> >   * @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command.
> >   * @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed.
> > @@ -63,6 +69,14 @@ enum xe_gt_sriov_control_bits {
> >  	XE_GT_SRIOV_STATE_PAUSE_FAILED,
> >  	XE_GT_SRIOV_STATE_PAUSED,
> >  
> > +	XE_GT_SRIOV_STATE_SAVE_WIP,
> > +	XE_GT_SRIOV_STATE_SAVE_FAILED,
> > +	XE_GT_SRIOV_STATE_SAVED,
> > +
> > +	XE_GT_SRIOV_STATE_RESTORE_WIP,
> > +	XE_GT_SRIOV_STATE_RESTORE_FAILED,
> > +	XE_GT_SRIOV_STATE_RESTORED,
> > +
> >  	XE_GT_SRIOV_STATE_RESUME_WIP,
> >  	XE_GT_SRIOV_STATE_RESUME_SEND_RESUME,
> >  	XE_GT_SRIOV_STATE_RESUME_FAILED,
> 
> it is easier to understand those states after patch 04/26 with diagrams,
> and while there are small and hard to avoid overlaps between 03/26 and 04/26
> the patch itself LGTM, so
> 
> Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>

Thanks,
-Michał