[PATCH v13 04/12] KVM: guest_memfd: Add slab-allocated inode cache

Sean Christopherson posted 12 patches 3 months, 3 weeks ago
[PATCH v13 04/12] KVM: guest_memfd: Add slab-allocated inode cache
Posted by Sean Christopherson 3 months, 3 weeks ago
From: Shivank Garg <shivankg@amd.com>

Add a dedicated gmem_inode structure and a slab-allocated inode cache for
guest memory backing, similar to how shmem handles inodes.

This adds the necessary allocation/destruction functions and prepares
for upcoming guest_memfd NUMA policy support changes.  Using a dedicated
structure will also allow for additional cleanups, e.g. to track flags in
gmem_inode instead of i_private.

Signed-off-by: Shivank Garg <shivankg@amd.com>
Tested-by: Ashish Kalra <ashish.kalra@amd.com>
[sean: s/kvm_gmem_inode_info/gmem_inode, name init_once()]
Reviewed-by: Ackerley Tng <ackerleytng@google.com>
Tested-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 virt/kvm/guest_memfd.c | 77 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index ce04fc85e631..88fd812f0f31 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -26,6 +26,15 @@ struct gmem_file {
 	struct list_head entry;
 };
 
+struct gmem_inode {
+	struct inode vfs_inode;
+};
+
+static __always_inline struct gmem_inode *GMEM_I(struct inode *inode)
+{
+	return container_of(inode, struct gmem_inode, vfs_inode);
+}
+
 #define kvm_gmem_for_each_file(f, mapping) \
 	list_for_each_entry(f, &(mapping)->i_private_list, entry)
 
@@ -830,13 +839,61 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate);
 #endif
 
+static struct kmem_cache *kvm_gmem_inode_cachep;
+
+static void kvm_gmem_init_inode_once(void *__gi)
+{
+	struct gmem_inode *gi = __gi;
+
+	/*
+	 * Note!  Don't initialize the inode with anything specific to the
+	 * guest_memfd instance, or that might be specific to how the inode is
+	 * used (from the VFS-layer's perspective).  This hook is called only
+	 * during the initial slab allocation, i.e. only fields/state that are
+	 * idempotent across _all_ use of the inode _object_ can be initialized
+	 * at this time!
+	 */
+	inode_init_once(&gi->vfs_inode);
+}
+
+static struct inode *kvm_gmem_alloc_inode(struct super_block *sb)
+{
+	struct gmem_inode *gi;
+
+	gi = alloc_inode_sb(sb, kvm_gmem_inode_cachep, GFP_KERNEL);
+	if (!gi)
+		return NULL;
+
+	return &gi->vfs_inode;
+}
+
+static void kvm_gmem_destroy_inode(struct inode *inode)
+{
+}
+
+static void kvm_gmem_free_inode(struct inode *inode)
+{
+	kmem_cache_free(kvm_gmem_inode_cachep, GMEM_I(inode));
+}
+
+static const struct super_operations kvm_gmem_super_operations = {
+	.statfs		= simple_statfs,
+	.alloc_inode	= kvm_gmem_alloc_inode,
+	.destroy_inode	= kvm_gmem_destroy_inode,
+	.free_inode	= kvm_gmem_free_inode,
+};
+
 static int kvm_gmem_init_fs_context(struct fs_context *fc)
 {
+	struct pseudo_fs_context *ctx;
+
 	if (!init_pseudo(fc, GUEST_MEMFD_MAGIC))
 		return -ENOMEM;
 
 	fc->s_iflags |= SB_I_NOEXEC;
 	fc->s_iflags |= SB_I_NODEV;
+	ctx = fc->fs_private;
+	ctx->ops = &kvm_gmem_super_operations;
 
 	return 0;
 }
@@ -860,13 +917,31 @@ static int kvm_gmem_init_mount(void)
 
 int kvm_gmem_init(struct module *module)
 {
+	struct kmem_cache_args args = {
+		.align = 0,
+		.ctor = kvm_gmem_init_inode_once,
+	};
+	int ret;
+
 	kvm_gmem_fops.owner = module;
+	kvm_gmem_inode_cachep = kmem_cache_create("kvm_gmem_inode_cache",
+						  sizeof(struct gmem_inode),
+						  &args, SLAB_ACCOUNT);
+	if (!kvm_gmem_inode_cachep)
+		return -ENOMEM;
 
-	return kvm_gmem_init_mount();
+	ret = kvm_gmem_init_mount();
+	if (ret) {
+		kmem_cache_destroy(kvm_gmem_inode_cachep);
+		return ret;
+	}
+	return 0;
 }
 
 void kvm_gmem_exit(void)
 {
 	kern_unmount(kvm_gmem_mnt);
 	kvm_gmem_mnt = NULL;
+	rcu_barrier();
+	kmem_cache_destroy(kvm_gmem_inode_cachep);
 }
-- 
2.51.0.858.gf9c4a03a3a-goog
Re: [PATCH v13 04/12] KVM: guest_memfd: Add slab-allocated inode cache
Posted by Vlastimil Babka 3 months, 2 weeks ago
On 10/16/25 19:28, Sean Christopherson wrote:
> From: Shivank Garg <shivankg@amd.com>
> 
> Add a dedicated gmem_inode structure and a slab-allocated inode cache for
> guest memory backing, similar to how shmem handles inodes.
> 
> This adds the necessary allocation/destruction functions and prepares
> for upcoming guest_memfd NUMA policy support changes.  Using a dedicated
> structure will also allow for additional cleanups, e.g. to track flags in
> gmem_inode instead of i_private.
> 
> Signed-off-by: Shivank Garg <shivankg@amd.com>
> Tested-by: Ashish Kalra <ashish.kalra@amd.com>
> [sean: s/kvm_gmem_inode_info/gmem_inode, name init_once()]
> Reviewed-by: Ackerley Tng <ackerleytng@google.com>
> Tested-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

Some nits below, not critical unless there's resubmit for other reasons:

> @@ -860,13 +917,31 @@ static int kvm_gmem_init_mount(void)
>  
>  int kvm_gmem_init(struct module *module)
>  {
> +	struct kmem_cache_args args = {
> +		.align = 0,

This seems unnecessary as it's implicit.

> +		.ctor = kvm_gmem_init_inode_once,
> +	};
> +	int ret;
> +
>  	kvm_gmem_fops.owner = module;
> +	kvm_gmem_inode_cachep = kmem_cache_create("kvm_gmem_inode_cache",
> +						  sizeof(struct gmem_inode),
> +						  &args, SLAB_ACCOUNT);
> +	if (!kvm_gmem_inode_cachep)
> +		return -ENOMEM;
>  
> -	return kvm_gmem_init_mount();
> +	ret = kvm_gmem_init_mount();
> +	if (ret) {
> +		kmem_cache_destroy(kvm_gmem_inode_cachep);
> +		return ret;
> +	}
> +	return 0;
>  }
>  
>  void kvm_gmem_exit(void)
>  {
>  	kern_unmount(kvm_gmem_mnt);
>  	kvm_gmem_mnt = NULL;
> +	rcu_barrier();

Is it because VFS can do call_rcu() with something that ends up with
kvm_gmem_free_inode()? Because nothing in this patch does that directly,
maybe worth a comment?

> +	kmem_cache_destroy(kvm_gmem_inode_cachep);
>  }
Re: [PATCH v13 04/12] KVM: guest_memfd: Add slab-allocated inode cache
Posted by Garg, Shivank 3 months, 2 weeks ago

On 10/27/2025 4:36 PM, Vlastimil Babka wrote:
> On 10/16/25 19:28, Sean Christopherson wrote:
>> From: Shivank Garg <shivankg@amd.com>
>>
>> Add a dedicated gmem_inode structure and a slab-allocated inode cache for
>> guest memory backing, similar to how shmem handles inodes.
>>
>> This adds the necessary allocation/destruction functions and prepares
>> for upcoming guest_memfd NUMA policy support changes.  Using a dedicated
>> structure will also allow for additional cleanups, e.g. to track flags in
>> gmem_inode instead of i_private.
>>
>> Signed-off-by: Shivank Garg <shivankg@amd.com>
>> Tested-by: Ashish Kalra <ashish.kalra@amd.com>
>> [sean: s/kvm_gmem_inode_info/gmem_inode, name init_once()]
>> Reviewed-by: Ackerley Tng <ackerleytng@google.com>
>> Tested-by: Ackerley Tng <ackerleytng@google.com>
>> Signed-off-by: Sean Christopherson <seanjc@google.com>
> 
> Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
> 
> Some nits below, not critical unless there's resubmit for other reasons:

Hi Vlastimil,

Thank you for the review.

> 
>> @@ -860,13 +917,31 @@ static int kvm_gmem_init_mount(void)
>>  
>>  int kvm_gmem_init(struct module *module)
>>  {
>> +	struct kmem_cache_args args = {
>> +		.align = 0,
> 
> This seems unnecessary as it's implicit.

Ack

>> +		.ctor = kvm_gmem_init_inode_once,
>> +	};
>> +	int ret;
>> +
>>  	kvm_gmem_fops.owner = module;
>> +	kvm_gmem_inode_cachep = kmem_cache_create("kvm_gmem_inode_cache",
>> +						  sizeof(struct gmem_inode),
>> +						  &args, SLAB_ACCOUNT);
>> +	if (!kvm_gmem_inode_cachep)
>> +		return -ENOMEM;
>>  
>> -	return kvm_gmem_init_mount();
>> +	ret = kvm_gmem_init_mount();
>> +	if (ret) {
>> +		kmem_cache_destroy(kvm_gmem_inode_cachep);
>> +		return ret;
>> +	}
>> +	return 0;
>>  }
>>  
>>  void kvm_gmem_exit(void)
>>  {
>>  	kern_unmount(kvm_gmem_mnt);
>>  	kvm_gmem_mnt = NULL;
>> +	rcu_barrier();
> 
> Is it because VFS can do call_rcu() with something that ends up with
> kvm_gmem_free_inode()? Because nothing in this patch does that directly,
> maybe worth a comment?

Yes, exactly. I discovered this race condition while debugging a bug that 
occurred during kvm_amd module unload after running gmem backed VM.

More details here:
https://lore.kernel.org/linux-mm/e7f7703d-fe76-4ab2-bef4-8d4c54da03ad@amd.com


diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 427c0acee9d7..e1f69747fc84 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -969,7 +969,6 @@ static int kvm_gmem_init_mount(void)
 int kvm_gmem_init(struct module *module)
 {
 	struct kmem_cache_args args = {
-		.align = 0,
 		.ctor = kvm_gmem_init_inode_once,
 	};
 	int ret;
@@ -993,6 +992,15 @@ void kvm_gmem_exit(void)
 {
 	kern_unmount(kvm_gmem_mnt);
 	kvm_gmem_mnt = NULL;
+
+	/*
+	 * Wait for all pending RCU callbacks to complete before destroying
+	 * the inode cache. The VFS layer use call_rcu() during inode
+	 * eviction (via evict_inodes() -> destroy_inode() -> call_rcu()),
+	 * which eventually calls kvm_gmem_free_inode().
+	 * We must ensure all such callbacks have finished before
+	 * kmem_cache_destroy() to avoid issues with the kmem cache.
+	 */
 	rcu_barrier();
 	kmem_cache_destroy(kvm_gmem_inode_cachep);
 }


> 
>> +	kmem_cache_destroy(kvm_gmem_inode_cachep);
>>  }
>