[PATCH RFC v2 10/15] vfio/nvgrace-egm: Clear Memory before handing out to VM

ankita@nvidia.com posted 15 patches 1 month, 1 week ago
[PATCH RFC v2 10/15] vfio/nvgrace-egm: Clear Memory before handing out to VM
Posted by ankita@nvidia.com 1 month, 1 week ago
From: Ankit Agrawal <ankita@nvidia.com>

The EGM region is invisible to the host Linux kernel and it does not
manage the region. The EGM module manages the EGM memory and thus is
responsible to clear out the region before handing out to the VM.

Clear EGM region on EGM chardev open. To avoid CPU lockup logs,
zap the region in 1G chunks.

Suggested-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 drivers/vfio/pci/nvgrace-gpu/egm.c | 43 ++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/drivers/vfio/pci/nvgrace-gpu/egm.c b/drivers/vfio/pci/nvgrace-gpu/egm.c
index 5786ebe374a5..de7771a4145d 100644
--- a/drivers/vfio/pci/nvgrace-gpu/egm.c
+++ b/drivers/vfio/pci/nvgrace-gpu/egm.c
@@ -15,6 +15,7 @@ static DEFINE_XARRAY(egm_chardevs);
 struct chardev {
 	struct device device;
 	struct cdev cdev;
+	atomic_t open_count;
 };
 
 static struct nvgrace_egm_dev *
@@ -30,6 +31,42 @@ static int nvgrace_egm_open(struct inode *inode, struct file *file)
 {
 	struct chardev *egm_chardev =
 		container_of(inode->i_cdev, struct chardev, cdev);
+	struct nvgrace_egm_dev *egm_dev =
+		egm_chardev_to_nvgrace_egm_dev(egm_chardev);
+	void *memaddr;
+
+	if (atomic_cmpxchg(&egm_chardev->open_count, 0, 1) != 0)
+		return -EBUSY;
+
+	/*
+	 * nvgrace-egm module is responsible to manage the EGM memory as
+	 * the host kernel has no knowledge of it. Clear the region before
+	 * handing over to userspace.
+	 */
+	memaddr = memremap(egm_dev->egmphys, egm_dev->egmlength, MEMREMAP_WB);
+	if (!memaddr) {
+		atomic_dec(&egm_chardev->open_count);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Clear in chunks of 1G to avoid CPU lockup logs.
+	 */
+	{
+		size_t remaining = egm_dev->egmlength;
+		u8 *chunk_addr = (u8 *)memaddr;
+		size_t chunk_size;
+
+		while (remaining > 0) {
+			chunk_size = min(remaining, SZ_1G);
+			memset(chunk_addr, 0, chunk_size);
+			cond_resched();
+			chunk_addr += chunk_size;
+			remaining -= chunk_size;
+		}
+	}
+
+	memunmap(memaddr);
 
 	file->private_data = egm_chardev;
 
@@ -38,8 +75,13 @@ static int nvgrace_egm_open(struct inode *inode, struct file *file)
 
 static int nvgrace_egm_release(struct inode *inode, struct file *file)
 {
+	struct chardev *egm_chardev =
+		container_of(inode->i_cdev, struct chardev, cdev);
+
 	file->private_data = NULL;
 
+	atomic_dec(&egm_chardev->open_count);
+
 	return 0;
 }
 
@@ -108,6 +150,7 @@ setup_egm_chardev(struct nvgrace_egm_dev *egm_dev)
 	egm_chardev->device.parent = &egm_dev->aux_dev.dev;
 	cdev_init(&egm_chardev->cdev, &file_ops);
 	egm_chardev->cdev.owner = THIS_MODULE;
+	atomic_set(&egm_chardev->open_count, 0);
 
 	ret = dev_set_name(&egm_chardev->device, "egm%lld", egm_dev->egmpxm);
 	if (ret)
-- 
2.34.1
Re: [PATCH RFC v2 10/15] vfio/nvgrace-egm: Clear Memory before handing out to VM
Posted by Alex Williamson 1 month ago
On Mon, 23 Feb 2026 15:55:09 +0000
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> The EGM region is invisible to the host Linux kernel and it does not
> manage the region. The EGM module manages the EGM memory and thus is
> responsible to clear out the region before handing out to the VM.
> 
> Clear EGM region on EGM chardev open. To avoid CPU lockup logs,
> zap the region in 1G chunks.
> 
> Suggested-by: Vikram Sethi <vsethi@nvidia.com>
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  drivers/vfio/pci/nvgrace-gpu/egm.c | 43 ++++++++++++++++++++++++++++++
>  1 file changed, 43 insertions(+)
> 
> diff --git a/drivers/vfio/pci/nvgrace-gpu/egm.c b/drivers/vfio/pci/nvgrace-gpu/egm.c
> index 5786ebe374a5..de7771a4145d 100644
> --- a/drivers/vfio/pci/nvgrace-gpu/egm.c
> +++ b/drivers/vfio/pci/nvgrace-gpu/egm.c
> @@ -15,6 +15,7 @@ static DEFINE_XARRAY(egm_chardevs);
>  struct chardev {
>  	struct device device;
>  	struct cdev cdev;
> +	atomic_t open_count;
>  };
>  
>  static struct nvgrace_egm_dev *
> @@ -30,6 +31,42 @@ static int nvgrace_egm_open(struct inode *inode, struct file *file)
>  {
>  	struct chardev *egm_chardev =
>  		container_of(inode->i_cdev, struct chardev, cdev);
> +	struct nvgrace_egm_dev *egm_dev =
> +		egm_chardev_to_nvgrace_egm_dev(egm_chardev);
> +	void *memaddr;
> +
> +	if (atomic_cmpxchg(&egm_chardev->open_count, 0, 1) != 0)
> +		return -EBUSY;
> +
> +	/*
> +	 * nvgrace-egm module is responsible to manage the EGM memory as
> +	 * the host kernel has no knowledge of it. Clear the region before
> +	 * handing over to userspace.
> +	 */
> +	memaddr = memremap(egm_dev->egmphys, egm_dev->egmlength, MEMREMAP_WB);
> +	if (!memaddr) {
> +		atomic_dec(&egm_chardev->open_count);
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * Clear in chunks of 1G to avoid CPU lockup logs.
> +	 */
> +	{
> +		size_t remaining = egm_dev->egmlength;
> +		u8 *chunk_addr = (u8 *)memaddr;
> +		size_t chunk_size;

Declare at the start of the function and remove this scope hack.

> +
> +		while (remaining > 0) {
> +			chunk_size = min(remaining, SZ_1G);

min_t(size_t,,);

> +			memset(chunk_addr, 0, chunk_size);
> +			cond_resched();
> +			chunk_addr += chunk_size;
> +			remaining -= chunk_size;
> +		}
> +	}

Aren't we going to want to do this asynchronously or run multiple
threads to avoid stalling VM launch? 

> +
> +	memunmap(memaddr);
>  
>  	file->private_data = egm_chardev;
>  
> @@ -38,8 +75,13 @@ static int nvgrace_egm_open(struct inode *inode, struct file *file)
>  
>  static int nvgrace_egm_release(struct inode *inode, struct file *file)
>  {
> +	struct chardev *egm_chardev =
> +		container_of(inode->i_cdev, struct chardev, cdev);
> +
>  	file->private_data = NULL;
>  
> +	atomic_dec(&egm_chardev->open_count);
> +
>  	return 0;
>  }
>  
> @@ -108,6 +150,7 @@ setup_egm_chardev(struct nvgrace_egm_dev *egm_dev)
>  	egm_chardev->device.parent = &egm_dev->aux_dev.dev;
>  	cdev_init(&egm_chardev->cdev, &file_ops);
>  	egm_chardev->cdev.owner = THIS_MODULE;
> +	atomic_set(&egm_chardev->open_count, 0);

Already zero from kzalloc.  Thanks,

Alex

>  
>  	ret = dev_set_name(&egm_chardev->device, "egm%lld", egm_dev->egmpxm);
>  	if (ret)