[RFC v2 PATCH 06/10] vfio/pci: Remove vfio_pci_zap_bars()

Matt Evans posted 10 patches 3 weeks, 4 days ago
[RFC v2 PATCH 06/10] vfio/pci: Remove vfio_pci_zap_bars()
Posted by Matt Evans 3 weeks, 4 days ago
vfio_pci_zap_bars() and the wrapper
vfio_pci_zap_and_down_write_memory_lock() are redundant as of
"vfio/pci: Convert BAR mmap() to use a DMABUF".  The DMABUFs used for
BAR mappings already zap PTEs via the existing
vfio_pci_dma_buf_move(), which notifies changes to the BAR space
(e.g. around reset).

Remove the old functions, and the various points needing to zap BARs
become slightly cleaner.

Signed-off-by: Matt Evans <mattev@meta.com>
---
 drivers/vfio/pci/vfio_pci_config.c | 18 ++++++------------
 drivers/vfio/pci/vfio_pci_core.c   | 30 +++++++-----------------------
 drivers/vfio/pci/vfio_pci_priv.h   |  1 -
 3 files changed, 13 insertions(+), 36 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index b4e39253f98d..c7ed28be1104 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -590,12 +590,9 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
 		virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
 		new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
 
-		if (!new_mem) {
-			vfio_pci_zap_and_down_write_memory_lock(vdev);
+		down_write(&vdev->memory_lock);
+		if (!new_mem)
 			vfio_pci_dma_buf_move(vdev, true);
-		} else {
-			down_write(&vdev->memory_lock);
-		}
 
 		/*
 		 * If the user is writing mem/io enable (new_mem/io) and we
@@ -712,12 +709,9 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
 static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev,
 					  pci_power_t state)
 {
-	if (state >= PCI_D3hot) {
-		vfio_pci_zap_and_down_write_memory_lock(vdev);
+	down_write(&vdev->memory_lock);
+	if (state >= PCI_D3hot)
 		vfio_pci_dma_buf_move(vdev, true);
-	} else {
-		down_write(&vdev->memory_lock);
-	}
 
 	vfio_pci_set_power_state(vdev, state);
 	if (__vfio_pci_memory_enabled(vdev))
@@ -908,7 +902,7 @@ static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,
 						 &cap);
 
 		if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
-			vfio_pci_zap_and_down_write_memory_lock(vdev);
+			down_write(&vdev->memory_lock);
 			vfio_pci_dma_buf_move(vdev, true);
 			pci_try_reset_function(vdev->pdev);
 			if (__vfio_pci_memory_enabled(vdev))
@@ -993,7 +987,7 @@ static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,
 						&cap);
 
 		if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
-			vfio_pci_zap_and_down_write_memory_lock(vdev);
+			down_write(&vdev->memory_lock);
 			vfio_pci_dma_buf_move(vdev, true);
 			pci_try_reset_function(vdev->pdev);
 			if (__vfio_pci_memory_enabled(vdev))
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 41224efa58d8..9e9ad97c2f7f 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -319,7 +319,7 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
 	 * The vdev power related flags are protected with 'memory_lock'
 	 * semaphore.
 	 */
-	vfio_pci_zap_and_down_write_memory_lock(vdev);
+	down_write(&vdev->memory_lock);
 	vfio_pci_dma_buf_move(vdev, true);
 
 	if (vdev->pm_runtime_engaged) {
@@ -1229,7 +1229,7 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
 	if (!vdev->reset_works)
 		return -EINVAL;
 
-	vfio_pci_zap_and_down_write_memory_lock(vdev);
+	down_write(&vdev->memory_lock);
 
 	/*
 	 * This function can be invoked while the power state is non-D0. If
@@ -1613,22 +1613,6 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_write);
 
-static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev)
-{
-	struct vfio_device *core_vdev = &vdev->vdev;
-	loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
-	loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
-	loff_t len = end - start;
-
-	unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true);
-}
-
-void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
-{
-	down_write(&vdev->memory_lock);
-	vfio_pci_zap_bars(vdev);
-}
-
 u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
 {
 	u16 cmd;
@@ -2487,10 +2471,11 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 		}
 
 		/*
-		 * Take the memory write lock for each device and zap BAR
-		 * mappings to prevent the user accessing the device while in
-		 * reset.  Locking multiple devices is prone to deadlock,
-		 * runaway and unwind if we hit contention.
+		 * Take the memory write lock for each device and
+		 * revoke all DMABUFs, which will prevent any access
+		 * to the device while in reset.  Locking multiple
+		 * devices is prone to deadlock, runaway and unwind if
+		 * we hit contention.
 		 */
 		if (!down_write_trylock(&vdev->memory_lock)) {
 			ret = -EBUSY;
@@ -2498,7 +2483,6 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
 		}
 
 		vfio_pci_dma_buf_move(vdev, true);
-		vfio_pci_zap_bars(vdev);
 	}
 
 	if (!list_entry_is_head(vdev,
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index 37ece9b4b5bd..e201c96bbb14 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -78,7 +78,6 @@ void vfio_config_free(struct vfio_pci_core_device *vdev);
 int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
 			     pci_power_t state);
 
-void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev);
 u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
 void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
 					u16 cmd);
-- 
2.47.3
Re: [RFC v2 PATCH 06/10] vfio/pci: Remove vfio_pci_zap_bars()
Posted by Christian König 3 weeks, 4 days ago
On 3/12/26 19:46, Matt Evans wrote:
> vfio_pci_zap_bars() and the wrapper
> vfio_pci_zap_and_down_write_memory_lock() are redundant as of
> "vfio/pci: Convert BAR mmap() to use a DMABUF".  The DMABUFs used for
> BAR mappings already zap PTEs via the existing
> vfio_pci_dma_buf_move(), which notifies changes to the BAR space
> (e.g. around reset).
> 
> Remove the old functions, and the various points needing to zap BARs
> become slightly cleaner.

No a full review, but it looks like you now take the DMA buf reservation lock while holding vdev->memory_lock.

I strongly recommend enabling lockdep while testing that, just to be on the sure side that all locks are taken in a consistend order.

Regards,
Christian.

> 
> Signed-off-by: Matt Evans <mattev@meta.com>
> ---
>  drivers/vfio/pci/vfio_pci_config.c | 18 ++++++------------
>  drivers/vfio/pci/vfio_pci_core.c   | 30 +++++++-----------------------
>  drivers/vfio/pci/vfio_pci_priv.h   |  1 -
>  3 files changed, 13 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
> index b4e39253f98d..c7ed28be1104 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -590,12 +590,9 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
>  		virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
>  		new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
>  
> -		if (!new_mem) {
> -			vfio_pci_zap_and_down_write_memory_lock(vdev);
> +		down_write(&vdev->memory_lock);
> +		if (!new_mem)
>  			vfio_pci_dma_buf_move(vdev, true);
> -		} else {
> -			down_write(&vdev->memory_lock);
> -		}
>  
>  		/*
>  		 * If the user is writing mem/io enable (new_mem/io) and we
> @@ -712,12 +709,9 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
>  static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev,
>  					  pci_power_t state)
>  {
> -	if (state >= PCI_D3hot) {
> -		vfio_pci_zap_and_down_write_memory_lock(vdev);
> +	down_write(&vdev->memory_lock);
> +	if (state >= PCI_D3hot)
>  		vfio_pci_dma_buf_move(vdev, true);
> -	} else {
> -		down_write(&vdev->memory_lock);
> -	}
>  
>  	vfio_pci_set_power_state(vdev, state);
>  	if (__vfio_pci_memory_enabled(vdev))
> @@ -908,7 +902,7 @@ static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,
>  						 &cap);
>  
>  		if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
> -			vfio_pci_zap_and_down_write_memory_lock(vdev);
> +			down_write(&vdev->memory_lock);
>  			vfio_pci_dma_buf_move(vdev, true);
>  			pci_try_reset_function(vdev->pdev);
>  			if (__vfio_pci_memory_enabled(vdev))
> @@ -993,7 +987,7 @@ static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,
>  						&cap);
>  
>  		if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
> -			vfio_pci_zap_and_down_write_memory_lock(vdev);
> +			down_write(&vdev->memory_lock);
>  			vfio_pci_dma_buf_move(vdev, true);
>  			pci_try_reset_function(vdev->pdev);
>  			if (__vfio_pci_memory_enabled(vdev))
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 41224efa58d8..9e9ad97c2f7f 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -319,7 +319,7 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
>  	 * The vdev power related flags are protected with 'memory_lock'
>  	 * semaphore.
>  	 */
> -	vfio_pci_zap_and_down_write_memory_lock(vdev);
> +	down_write(&vdev->memory_lock);
>  	vfio_pci_dma_buf_move(vdev, true);
>  
>  	if (vdev->pm_runtime_engaged) {
> @@ -1229,7 +1229,7 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
>  	if (!vdev->reset_works)
>  		return -EINVAL;
>  
> -	vfio_pci_zap_and_down_write_memory_lock(vdev);
> +	down_write(&vdev->memory_lock);
>  
>  	/*
>  	 * This function can be invoked while the power state is non-D0. If
> @@ -1613,22 +1613,6 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu
>  }
>  EXPORT_SYMBOL_GPL(vfio_pci_core_write);
>  
> -static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev)
> -{
> -	struct vfio_device *core_vdev = &vdev->vdev;
> -	loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX);
> -	loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX);
> -	loff_t len = end - start;
> -
> -	unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true);
> -}
> -
> -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev)
> -{
> -	down_write(&vdev->memory_lock);
> -	vfio_pci_zap_bars(vdev);
> -}
> -
>  u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev)
>  {
>  	u16 cmd;
> @@ -2487,10 +2471,11 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  		}
>  
>  		/*
> -		 * Take the memory write lock for each device and zap BAR
> -		 * mappings to prevent the user accessing the device while in
> -		 * reset.  Locking multiple devices is prone to deadlock,
> -		 * runaway and unwind if we hit contention.
> +		 * Take the memory write lock for each device and
> +		 * revoke all DMABUFs, which will prevent any access
> +		 * to the device while in reset.  Locking multiple
> +		 * devices is prone to deadlock, runaway and unwind if
> +		 * we hit contention.
>  		 */
>  		if (!down_write_trylock(&vdev->memory_lock)) {
>  			ret = -EBUSY;
> @@ -2498,7 +2483,6 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
>  		}
>  
>  		vfio_pci_dma_buf_move(vdev, true);
> -		vfio_pci_zap_bars(vdev);
>  	}
>  
>  	if (!list_entry_is_head(vdev,
> diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
> index 37ece9b4b5bd..e201c96bbb14 100644
> --- a/drivers/vfio/pci/vfio_pci_priv.h
> +++ b/drivers/vfio/pci/vfio_pci_priv.h
> @@ -78,7 +78,6 @@ void vfio_config_free(struct vfio_pci_core_device *vdev);
>  int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
>  			     pci_power_t state);
>  
> -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev);
>  u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
>  void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
>  					u16 cmd);