[PATCH v2 24/26] drm/xe/pf: Enable SR-IOV VF migration for PTL and BMG

Michał Winiarski posted 26 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH v2 24/26] drm/xe/pf: Enable SR-IOV VF migration for PTL and BMG
Posted by Michał Winiarski 3 months, 2 weeks ago
All of the necessary building blocks are now in place for PTL and BMG to
support SR-IOV VF migration.
Enable the feature without the need to pass feature enabling debug flags
for those platforms.

Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/xe/xe_device.h             | 5 +++++
 drivers/gpu/drm/xe/xe_device_types.h       | 2 ++
 drivers/gpu/drm/xe/xe_pci.c                | 8 ++++++--
 drivers/gpu/drm/xe/xe_pci_types.h          | 1 +
 drivers/gpu/drm/xe/xe_sriov_pf_migration.c | 4 +++-
 5 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 32cc6323b7f64..0c4404c78227c 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -152,6 +152,11 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
 	return xe->info.has_sriov;
 }
 
+static inline bool xe_device_has_sriov_vf_migration(struct xe_device *xe)
+{
+	return xe->info.has_sriov_vf_migration;
+}
+
 static inline bool xe_device_has_msix(struct xe_device *xe)
 {
 	return xe->irq.msix.nvec > 0;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 02c04ad7296e4..8973e17b9a359 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -311,6 +311,8 @@ struct xe_device {
 		u8 has_range_tlb_inval:1;
 		/** @info.has_sriov: Supports SR-IOV */
 		u8 has_sriov:1;
+		/** @info.has_sriov_vf_migration: Supports SR-IOV VF migration */
+		u8 has_sriov_vf_migration:1;
 		/** @info.has_usm: Device has unified shared memory support */
 		u8 has_usm:1;
 		/** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index c3136141a9536..d4f9ee9d020b2 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -362,6 +362,7 @@ static const struct xe_device_desc bmg_desc = {
 	.has_heci_cscfi = 1,
 	.has_late_bind = true,
 	.has_sriov = true,
+	.has_sriov_vf_migration = true,
 	.max_gt_per_tile = 2,
 	.needs_scratch = true,
 	.subplatforms = (const struct xe_subplatform_desc[]) {
@@ -378,6 +379,7 @@ static const struct xe_device_desc ptl_desc = {
 	.has_display = true,
 	.has_flat_ccs = 1,
 	.has_sriov = true,
+	.has_sriov_vf_migration = true,
 	.max_gt_per_tile = 2,
 	.needs_scratch = true,
 	.needs_shared_vf_gt_wq = true,
@@ -657,6 +659,7 @@ static int xe_info_init_early(struct xe_device *xe,
 	xe->info.has_pxp = desc->has_pxp;
 	xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
 		desc->has_sriov;
+	xe->info.has_sriov_vf_migration = desc->has_sriov_vf_migration;
 	xe->info.skip_guc_pc = desc->skip_guc_pc;
 	xe->info.skip_mtcfg = desc->skip_mtcfg;
 	xe->info.skip_pcode = desc->skip_pcode;
@@ -1020,9 +1023,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		xe_step_name(xe->info.step.media),
 		xe_step_name(xe->info.step.basedie));
 
-	drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n",
+	drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s) (VF migration: %s)\n",
 		str_yes_no(xe_device_has_sriov(xe)),
-		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)),
+		str_yes_no(xe_device_has_sriov_vf_migration(xe)));
 
 	err = xe_pm_init_early(xe);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index a4451bdc79fb3..40f158b3ac890 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -48,6 +48,7 @@ struct xe_device_desc {
 	u8 has_mbx_power_limits:1;
 	u8 has_pxp:1;
 	u8 has_sriov:1;
+	u8 has_sriov_vf_migration:1;
 	u8 needs_scratch:1;
 	u8 skip_guc_pc:1;
 	u8 skip_mtcfg:1;
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
index 88babec9c893e..a6cf3b57edba1 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
@@ -50,7 +50,9 @@ bool xe_sriov_pf_migration_supported(struct xe_device *xe)
 
 static bool pf_check_migration_support(struct xe_device *xe)
 {
-	/* XXX: for now this is for feature enabling only */
+	if (xe_device_has_sriov_vf_migration(xe))
+		return true;
+
 	return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
 }
 
-- 
2.50.1

Re: [PATCH v2 24/26] drm/xe/pf: Enable SR-IOV VF migration for PTL and BMG
Posted by Michal Wajdeczko 3 months, 2 weeks ago

On 10/22/2025 12:41 AM, Michał Winiarski wrote:
> All of the necessary building blocks are now in place for PTL and BMG to
> support SR-IOV VF migration.
> Enable the feature without the need to pass feature enabling debug flags
> for those platforms.
> 
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device.h             | 5 +++++
>  drivers/gpu/drm/xe/xe_device_types.h       | 2 ++
>  drivers/gpu/drm/xe/xe_pci.c                | 8 ++++++--
>  drivers/gpu/drm/xe/xe_pci_types.h          | 1 +
>  drivers/gpu/drm/xe/xe_sriov_pf_migration.c | 4 +++-
>  5 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
> index 32cc6323b7f64..0c4404c78227c 100644
> --- a/drivers/gpu/drm/xe/xe_device.h
> +++ b/drivers/gpu/drm/xe/xe_device.h
> @@ -152,6 +152,11 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
>  	return xe->info.has_sriov;
>  }
>  
> +static inline bool xe_device_has_sriov_vf_migration(struct xe_device *xe)
> +{
> +	return xe->info.has_sriov_vf_migration;
> +}
> +
>  static inline bool xe_device_has_msix(struct xe_device *xe)
>  {
>  	return xe->irq.msix.nvec > 0;
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 02c04ad7296e4..8973e17b9a359 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -311,6 +311,8 @@ struct xe_device {
>  		u8 has_range_tlb_inval:1;
>  		/** @info.has_sriov: Supports SR-IOV */
>  		u8 has_sriov:1;
> +		/** @info.has_sriov_vf_migration: Supports SR-IOV VF migration */
> +		u8 has_sriov_vf_migration:1;
>  		/** @info.has_usm: Device has unified shared memory support */
>  		u8 has_usm:1;
>  		/** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> index c3136141a9536..d4f9ee9d020b2 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -362,6 +362,7 @@ static const struct xe_device_desc bmg_desc = {
>  	.has_heci_cscfi = 1,
>  	.has_late_bind = true,
>  	.has_sriov = true,
> +	.has_sriov_vf_migration = true,
>  	.max_gt_per_tile = 2,
>  	.needs_scratch = true,
>  	.subplatforms = (const struct xe_subplatform_desc[]) {
> @@ -378,6 +379,7 @@ static const struct xe_device_desc ptl_desc = {
>  	.has_display = true,
>  	.has_flat_ccs = 1,
>  	.has_sriov = true,
> +	.has_sriov_vf_migration = true,
>  	.max_gt_per_tile = 2,
>  	.needs_scratch = true,
>  	.needs_shared_vf_gt_wq = true,
> @@ -657,6 +659,7 @@ static int xe_info_init_early(struct xe_device *xe,
>  	xe->info.has_pxp = desc->has_pxp;
>  	xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
>  		desc->has_sriov;
> +	xe->info.has_sriov_vf_migration = desc->has_sriov_vf_migration;
>  	xe->info.skip_guc_pc = desc->skip_guc_pc;
>  	xe->info.skip_mtcfg = desc->skip_mtcfg;
>  	xe->info.skip_pcode = desc->skip_pcode;
> @@ -1020,9 +1023,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>  		xe_step_name(xe->info.step.media),
>  		xe_step_name(xe->info.step.basedie));
>  
> -	drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n",
> +	drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s) (VF migration: %s)\n",
>  		str_yes_no(xe_device_has_sriov(xe)),
> -		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
> +		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)),
> +		str_yes_no(xe_device_has_sriov_vf_migration(xe)));
>  
>  	err = xe_pm_init_early(xe);
>  	if (err)
> diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
> index a4451bdc79fb3..40f158b3ac890 100644
> --- a/drivers/gpu/drm/xe/xe_pci_types.h
> +++ b/drivers/gpu/drm/xe/xe_pci_types.h
> @@ -48,6 +48,7 @@ struct xe_device_desc {
>  	u8 has_mbx_power_limits:1;
>  	u8 has_pxp:1;
>  	u8 has_sriov:1;
> +	u8 has_sriov_vf_migration:1;
>  	u8 needs_scratch:1;
>  	u8 skip_guc_pc:1;
>  	u8 skip_mtcfg:1;
> diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
> index 88babec9c893e..a6cf3b57edba1 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
> @@ -50,7 +50,9 @@ bool xe_sriov_pf_migration_supported(struct xe_device *xe)
>  
>  static bool pf_check_migration_support(struct xe_device *xe)
>  {
> -	/* XXX: for now this is for feature enabling only */
> +	if (xe_device_has_sriov_vf_migration(xe))
> +		return true;

but from the PF POV, are there any differences in migration between platforms which already have .has_sriov flag?

and on the VF side we decided just to rely on the xe_has_memirq() flag, maybe we can do the same her on PF side?

note that all pre-PTL platforms require .force_probe flag anyway,
and that's we also enabled unconditional .has_sriov flag for them


btw, IIRC we also should check for min GuC version on PTL for proper CCS migration,
IMO the PF shall reject VF migration on older GuC

> +
>  	return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
>  }
>