[PATCH v6 12/20] mm: shmem: allow freezing inode mapping

Pasha Tatashin posted 20 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH v6 12/20] mm: shmem: allow freezing inode mapping
Posted by Pasha Tatashin 2 months, 3 weeks ago
From: Pratyush Yadav <ptyadav@amazon.de>

To prepare a shmem inode for live update via the Live Update
Orchestrator (LUO), its index -> folio mappings must be serialized. Once
the mappings are serialized, they cannot change since it would cause the
serialized data to become inconsistent. This can be done by pinning the
folios to avoid migration, and by making sure no folios can be added to
or removed from the inode.

While mechanisms to pin folios already exist, the only way to stop
folios being added or removed are the grow and shrink file seals. But
file seals come with their own semantics, one of which is that they
can't be removed. This doesn't work with liveupdate since it can be
cancelled or error out, which would need the seals to be removed and the
file's normal functionality to be restored.

Introduce SHMEM_F_MAPPING_FROZEN to indicate this instead. It is
internal to shmem and is not directly exposed to userspace. It functions
similar to F_SEAL_GROW | F_SEAL_SHRINK, but additionally disallows hole
punching, and can be removed.

Signed-off-by: Pratyush Yadav <ptyadav@amazon.de>
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
 include/linux/shmem_fs.h | 17 +++++++++++++++++
 mm/shmem.c               | 12 +++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 650874b400b5..a9f5db472a39 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -24,6 +24,14 @@ struct swap_iocb;
 #define SHMEM_F_NORESERVE	BIT(0)
 /* Disallow swapping. */
 #define SHMEM_F_LOCKED		BIT(1)
+/*
+ * Disallow growing, shrinking, or hole punching in the inode. Combined with
+ * folio pinning, makes sure the inode's mapping stays fixed.
+ *
+ * In some ways similar to F_SEAL_GROW | F_SEAL_SHRINK, but can be removed and
+ * isn't directly visible to userspace.
+ */
+#define SHMEM_F_MAPPING_FROZEN	BIT(2)
 
 struct shmem_inode_info {
 	spinlock_t		lock;
@@ -186,6 +194,15 @@ static inline bool shmem_file(struct file *file)
 	return shmem_mapping(file->f_mapping);
 }
 
+/* Must be called with inode lock taken exclusive. */
+static inline void shmem_i_mapping_freeze(struct inode *inode, bool freeze)
+{
+	if (freeze)
+		SHMEM_I(inode)->flags |= SHMEM_F_MAPPING_FROZEN;
+	else
+		SHMEM_I(inode)->flags &= ~SHMEM_F_MAPPING_FROZEN;
+}
+
 /*
  * If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
  * beyond i_size's notion of EOF, which fallocate has committed to reserving:
diff --git a/mm/shmem.c b/mm/shmem.c
index 1d5036dec08a..05c3db840257 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1292,7 +1292,8 @@ static int shmem_setattr(struct mnt_idmap *idmap,
 		loff_t newsize = attr->ia_size;
 
 		/* protected by i_rwsem */
-		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
+		if ((info->flags & SHMEM_F_MAPPING_FROZEN) ||
+		    (newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
 		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
 			return -EPERM;
 
@@ -3289,6 +3290,10 @@ shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping,
 			return -EPERM;
 	}
 
+	if (unlikely((info->flags & SHMEM_F_MAPPING_FROZEN) &&
+		     pos + len > inode->i_size))
+		return -EPERM;
+
 	ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
 	if (ret)
 		return ret;
@@ -3662,6 +3667,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 
 	inode_lock(inode);
 
+	if (info->flags & SHMEM_F_MAPPING_FROZEN) {
+		error = -EPERM;
+		goto out;
+	}
+
 	if (mode & FALLOC_FL_PUNCH_HOLE) {
 		struct address_space *mapping = file->f_mapping;
 		loff_t unmap_start = round_up(offset, PAGE_SIZE);
-- 
2.52.0.rc1.455.g30608eb744-goog
Re: [PATCH v6 12/20] mm: shmem: allow freezing inode mapping
Posted by Mike Rapoport 2 months, 3 weeks ago
On Sat, Nov 15, 2025 at 06:33:58PM -0500, Pasha Tatashin wrote:
> From: Pratyush Yadav <ptyadav@amazon.de>
> 
> To prepare a shmem inode for live update via the Live Update
> Orchestrator (LUO), its index -> folio mappings must be serialized. Once
> the mappings are serialized, they cannot change since it would cause the
> serialized data to become inconsistent. This can be done by pinning the
> folios to avoid migration, and by making sure no folios can be added to
> or removed from the inode.
> 
> While mechanisms to pin folios already exist, the only way to stop
> folios being added or removed are the grow and shrink file seals. But
> file seals come with their own semantics, one of which is that they
> can't be removed. This doesn't work with liveupdate since it can be
> cancelled or error out, which would need the seals to be removed and the
> file's normal functionality to be restored.
> 
> Introduce SHMEM_F_MAPPING_FROZEN to indicate this instead. It is
> internal to shmem and is not directly exposed to userspace. It functions
> similar to F_SEAL_GROW | F_SEAL_SHRINK, but additionally disallows hole
> punching, and can be removed.
> 
> Signed-off-by: Pratyush Yadav <ptyadav@amazon.de>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
>  include/linux/shmem_fs.h | 17 +++++++++++++++++
>  mm/shmem.c               | 12 +++++++++++-
>  2 files changed, 28 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 650874b400b5..a9f5db472a39 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -24,6 +24,14 @@ struct swap_iocb;
>  #define SHMEM_F_NORESERVE	BIT(0)
>  /* Disallow swapping. */
>  #define SHMEM_F_LOCKED		BIT(1)
> +/*
> + * Disallow growing, shrinking, or hole punching in the inode. Combined with
> + * folio pinning, makes sure the inode's mapping stays fixed.
> + *
> + * In some ways similar to F_SEAL_GROW | F_SEAL_SHRINK, but can be removed and
> + * isn't directly visible to userspace.
> + */
> +#define SHMEM_F_MAPPING_FROZEN	BIT(2)
>  
>  struct shmem_inode_info {
>  	spinlock_t		lock;
> @@ -186,6 +194,15 @@ static inline bool shmem_file(struct file *file)
>  	return shmem_mapping(file->f_mapping);
>  }
>  
> +/* Must be called with inode lock taken exclusive. */
> +static inline void shmem_i_mapping_freeze(struct inode *inode, bool freeze)

_mapping usually refers to operations on struct address_space.
It seems that all shmem methods that take inode are just shmem_<operation>,
so shmem_freeze() looks more appropriate.

> +{
> +	if (freeze)
> +		SHMEM_I(inode)->flags |= SHMEM_F_MAPPING_FROZEN;
> +	else
> +		SHMEM_I(inode)->flags &= ~SHMEM_F_MAPPING_FROZEN;
> +}
> +
>  /*
>   * If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
>   * beyond i_size's notion of EOF, which fallocate has committed to reserving:
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 1d5036dec08a..05c3db840257 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1292,7 +1292,8 @@ static int shmem_setattr(struct mnt_idmap *idmap,
>  		loff_t newsize = attr->ia_size;
>  
>  		/* protected by i_rwsem */
> -		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
> +		if ((info->flags & SHMEM_F_MAPPING_FROZEN) ||

A corner case: if newsize == oldsize this will be a false positive

> +		    (newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
>  		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
>  			return -EPERM;
>  
> @@ -3289,6 +3290,10 @@ shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping,
>  			return -EPERM;
>  	}
>  
> +	if (unlikely((info->flags & SHMEM_F_MAPPING_FROZEN) &&
> +		     pos + len > inode->i_size))
> +		return -EPERM;
> +
>  	ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
>  	if (ret)
>  		return ret;
> @@ -3662,6 +3667,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
>  
>  	inode_lock(inode);
>  
> +	if (info->flags & SHMEM_F_MAPPING_FROZEN) {
> +		error = -EPERM;
> +		goto out;
> +	}
> +
>  	if (mode & FALLOC_FL_PUNCH_HOLE) {
>  		struct address_space *mapping = file->f_mapping;
>  		loff_t unmap_start = round_up(offset, PAGE_SIZE);
> -- 
> 2.52.0.rc1.455.g30608eb744-goog
> 

-- 
Sincerely yours,
Mike.
Re: [PATCH v6 12/20] mm: shmem: allow freezing inode mapping
Posted by Pratyush Yadav 2 months, 2 weeks ago
On Mon, Nov 17 2025, Mike Rapoport wrote:

> On Sat, Nov 15, 2025 at 06:33:58PM -0500, Pasha Tatashin wrote:
>> From: Pratyush Yadav <ptyadav@amazon.de>
>> 
>> To prepare a shmem inode for live update via the Live Update
>> Orchestrator (LUO), its index -> folio mappings must be serialized. Once
>> the mappings are serialized, they cannot change since it would cause the
>> serialized data to become inconsistent. This can be done by pinning the
>> folios to avoid migration, and by making sure no folios can be added to
>> or removed from the inode.
>> 
>> While mechanisms to pin folios already exist, the only way to stop
>> folios being added or removed are the grow and shrink file seals. But
>> file seals come with their own semantics, one of which is that they
>> can't be removed. This doesn't work with liveupdate since it can be
>> cancelled or error out, which would need the seals to be removed and the
>> file's normal functionality to be restored.
>> 
>> Introduce SHMEM_F_MAPPING_FROZEN to indicate this instead. It is
>> internal to shmem and is not directly exposed to userspace. It functions
>> similar to F_SEAL_GROW | F_SEAL_SHRINK, but additionally disallows hole
>> punching, and can be removed.
>> 
>> Signed-off-by: Pratyush Yadav <ptyadav@amazon.de>
>> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
>> ---
[...]
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 1d5036dec08a..05c3db840257 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1292,7 +1292,8 @@ static int shmem_setattr(struct mnt_idmap *idmap,
>>  		loff_t newsize = attr->ia_size;
>>  
>>  		/* protected by i_rwsem */
>> -		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
>> +		if ((info->flags & SHMEM_F_MAPPING_FROZEN) ||
>
> A corner case: if newsize == oldsize this will be a false positive

Good catch. Though I wonder why anyone would do a truncate with the same
size.

>
>> +		    (newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
>>  		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
>>  			return -EPERM;
>>  
[...]

-- 
Regards,
Pratyush Yadav
Re: [PATCH v6 12/20] mm: shmem: allow freezing inode mapping
Posted by Pasha Tatashin 2 months, 3 weeks ago
> > +/* Must be called with inode lock taken exclusive. */
> > +static inline void shmem_i_mapping_freeze(struct inode *inode, bool freeze)
>
> _mapping usually refers to operations on struct address_space.
> It seems that all shmem methods that take inode are just shmem_<operation>,
> so shmem_freeze() looks more appropriate.

Done, renamed to shmem_freeze()

>
> > +{
> > +     if (freeze)
> > +             SHMEM_I(inode)->flags |= SHMEM_F_MAPPING_FROZEN;
> > +     else
> > +             SHMEM_I(inode)->flags &= ~SHMEM_F_MAPPING_FROZEN;
> > +}
> > +
> >  /*
> >   * If fallocate(FALLOC_FL_KEEP_SIZE) has been used, there may be pages
> >   * beyond i_size's notion of EOF, which fallocate has committed to reserving:
> > diff --git a/mm/shmem.c b/mm/shmem.c
> > index 1d5036dec08a..05c3db840257 100644
> > --- a/mm/shmem.c
> > +++ b/mm/shmem.c
> > @@ -1292,7 +1292,8 @@ static int shmem_setattr(struct mnt_idmap *idmap,
> >               loff_t newsize = attr->ia_size;
> >
> >               /* protected by i_rwsem */
> > -             if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
> > +             if ((info->flags & SHMEM_F_MAPPING_FROZEN) ||
>
> A corner case: if newsize == oldsize this will be a false positive

Added a fix.

Thanks,
Pasha

>
> > +                 (newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
> >                   (newsize > oldsize && (info->seals & F_SEAL_GROW)))
> >                       return -EPERM;
> >
> > @@ -3289,6 +3290,10 @@ shmem_write_begin(const struct kiocb *iocb, struct address_space *mapping,
> >                       return -EPERM;
> >       }
> >
> > +     if (unlikely((info->flags & SHMEM_F_MAPPING_FROZEN) &&
> > +                  pos + len > inode->i_size))
> > +             return -EPERM;
> > +
> >       ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
> >       if (ret)
> >               return ret;
> > @@ -3662,6 +3667,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
> >
> >       inode_lock(inode);
> >
> > +     if (info->flags & SHMEM_F_MAPPING_FROZEN) {
> > +             error = -EPERM;
> > +             goto out;
> > +     }
> > +
> >       if (mode & FALLOC_FL_PUNCH_HOLE) {
> >               struct address_space *mapping = file->f_mapping;
> >               loff_t unmap_start = round_up(offset, PAGE_SIZE);
> > --
> > 2.52.0.rc1.455.g30608eb744-goog
> >
>
> --
> Sincerely yours,
> Mike.