[PATCH v8 2/9] erofs: hold read context in iomap_iter if needed

Hongbo Li posted 9 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH v8 2/9] erofs: hold read context in iomap_iter if needed
Posted by Hongbo Li 2 months, 3 weeks ago
Uncoming page cache sharing needs pass read context to iomap_iter,
here we unify the way of passing the read context in EROFS. Moreover,
bmap and fiemap don't need to map the inline data.

Note that we keep `struct page *` in `struct erofs_iomap_iter_ctx` as
well to avoid bogus kmap_to_page usage.

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
---
 fs/erofs/data.c | 79 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 59 insertions(+), 20 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index bb13c4cb8455..bd3d85c61341 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -266,14 +266,23 @@ void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
 	folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
 }
 
+struct erofs_iomap_iter_ctx {
+	struct page *page;
+	void *base;
+};
+
 static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
 {
 	int ret;
+	struct erofs_iomap_iter_ctx *ctx;
 	struct super_block *sb = inode->i_sb;
 	struct erofs_map_blocks map;
 	struct erofs_map_dev mdev;
+	struct iomap_iter *iter;
 
+	iter = container_of(iomap, struct iomap_iter, iomap);
+	ctx = iter->private;
 	map.m_la = offset;
 	map.m_llen = length;
 	ret = erofs_map_blocks(inode, &map);
@@ -283,7 +292,8 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	iomap->offset = map.m_la;
 	iomap->length = map.m_llen;
 	iomap->flags = 0;
-	iomap->private = NULL;
+	if (ctx)
+		ctx->base = NULL;
 	iomap->addr = IOMAP_NULL_ADDR;
 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
 		iomap->type = IOMAP_HOLE;
@@ -309,16 +319,20 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	}
 
 	if (map.m_flags & EROFS_MAP_META) {
-		void *ptr;
-		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
-
 		iomap->type = IOMAP_INLINE;
-		ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
-					 erofs_inode_in_metabox(inode));
-		if (IS_ERR(ptr))
-			return PTR_ERR(ptr);
-		iomap->inline_data = ptr;
-		iomap->private = buf.base;
+		/* read context should read the inlined data */
+		if (ctx) {
+			void *ptr;
+			struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
+
+			ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
+						 erofs_inode_in_metabox(inode));
+			if (IS_ERR(ptr))
+				return PTR_ERR(ptr);
+			iomap->inline_data = ptr;
+			ctx->page = buf.page;
+			ctx->base = buf.base;
+		}
 	} else {
 		iomap->type = IOMAP_MAPPED;
 	}
@@ -328,18 +342,19 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 		ssize_t written, unsigned int flags, struct iomap *iomap)
 {
-	void *ptr = iomap->private;
+	struct erofs_iomap_iter_ctx *ctx;
+	struct iomap_iter *iter;
 
-	if (ptr) {
+	iter = container_of(iomap, struct iomap_iter, iomap);
+	ctx = iter->private;
+	if (ctx && ctx->base) {
 		struct erofs_buf buf = {
-			.page = kmap_to_page(ptr),
-			.base = ptr,
+			.page = ctx->page,
+			.base = ctx->base,
 		};
 
 		DBG_BUGON(iomap->type != IOMAP_INLINE);
 		erofs_put_metabuf(&buf);
-	} else {
-		DBG_BUGON(iomap->type == IOMAP_INLINE);
 	}
 	return written;
 }
@@ -369,18 +384,36 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  */
 static int erofs_read_folio(struct file *file, struct folio *folio)
 {
+	struct iomap_read_folio_ctx read_ctx = {
+		.ops		= &iomap_bio_read_ops,
+		.cur_folio	= folio,
+	};
+	struct erofs_iomap_iter_ctx iter_ctx = {
+		.page		= NULL,
+		.base		= NULL,
+	};
+
 	trace_erofs_read_folio(folio, true);
 
-	iomap_bio_read_folio(folio, &erofs_iomap_ops);
+	iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
 	return 0;
 }
 
 static void erofs_readahead(struct readahead_control *rac)
 {
+	struct iomap_read_folio_ctx read_ctx = {
+		.ops		= &iomap_bio_read_ops,
+		.rac		= rac,
+	};
+	struct erofs_iomap_iter_ctx iter_ctx = {
+		.page		= NULL,
+		.base		= NULL,
+	};
+
 	trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
 					readahead_count(rac), true);
 
-	iomap_bio_readahead(rac, &erofs_iomap_ops);
+	iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
 }
 
 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
@@ -400,9 +433,15 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (IS_DAX(inode))
 		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
 #endif
-	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
+	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
+		struct erofs_iomap_iter_ctx iter_ctx = {
+			.page = NULL,
+			.base = NULL,
+		};
+
 		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
-				    NULL, 0, NULL, 0);
+				    NULL, 0, &iter_ctx, 0);
+	}
 	return filemap_read(iocb, to, 0);
 }
 
-- 
2.22.0
Re: [PATCH v8 2/9] erofs: hold read context in iomap_iter if needed
Posted by Gao Xiang 2 months, 3 weeks ago

On 2025/11/14 17:55, Hongbo Li wrote:
> Uncoming page cache sharing needs pass read context to iomap_iter,
> here we unify the way of passing the read context in EROFS. Moreover,
> bmap and fiemap don't need to map the inline data.
> 
> Note that we keep `struct page *` in `struct erofs_iomap_iter_ctx` as
> well to avoid bogus kmap_to_page usage.
> 
> Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
> ---
>   fs/erofs/data.c | 79 ++++++++++++++++++++++++++++++++++++-------------
>   1 file changed, 59 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index bb13c4cb8455..bd3d85c61341 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -266,14 +266,23 @@ void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
>   	folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
>   }
>   
> +struct erofs_iomap_iter_ctx {
> +	struct page *page;
> +	void *base;
> +};
> +
>   static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>   		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
>   {
>   	int ret;
> +	struct erofs_iomap_iter_ctx *ctx;
>   	struct super_block *sb = inode->i_sb;
>   	struct erofs_map_blocks map;
>   	struct erofs_map_dev mdev;
> +	struct iomap_iter *iter;
>   
> +	iter = container_of(iomap, struct iomap_iter, iomap);
> +	ctx = iter->private;

Can you just rearrange it as:

	struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
	struct erofs_iomap_iter_ctx *ctx = iter->private;

?

>   	map.m_la = offset;
>   	map.m_llen = length;
>   	ret = erofs_map_blocks(inode, &map);
> @@ -283,7 +292,8 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>   	iomap->offset = map.m_la;
>   	iomap->length = map.m_llen;
>   	iomap->flags = 0;
> -	iomap->private = NULL;
> +	if (ctx)
> +		ctx->base = NULL;

I think this line is unnecessary if iter->private == ctx;

>   	iomap->addr = IOMAP_NULL_ADDR;
>   	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
>   		iomap->type = IOMAP_HOLE;
> @@ -309,16 +319,20 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>   	}
>   
>   	if (map.m_flags & EROFS_MAP_META) {
> -		void *ptr;
> -		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
> -
>   		iomap->type = IOMAP_INLINE;
> -		ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
> -					 erofs_inode_in_metabox(inode));
> -		if (IS_ERR(ptr))
> -			return PTR_ERR(ptr);
> -		iomap->inline_data = ptr;
> -		iomap->private = buf.base;
> +		/* read context should read the inlined data */
> +		if (ctx) {
> +			void *ptr;
> +			struct erofs_buf buf = __EROFS_BUF_INITIALIZER;

better to resort them as:
			struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
			void *ptr;

> +
> +			ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
> +						 erofs_inode_in_metabox(inode));
> +			if (IS_ERR(ptr))
> +				return PTR_ERR(ptr);
> +			iomap->inline_data = ptr;
> +			ctx->page = buf.page;
> +			ctx->base = buf.base;
> +		}
>   	} else {
>   		iomap->type = IOMAP_MAPPED;
>   	}
> @@ -328,18 +342,19 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>   static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
>   		ssize_t written, unsigned int flags, struct iomap *iomap)
>   {
> -	void *ptr = iomap->private;
> +	struct erofs_iomap_iter_ctx *ctx;
> +	struct iomap_iter *iter;
>   
> -	if (ptr) {
> +	iter = container_of(iomap, struct iomap_iter, iomap);
> +	ctx = iter->private;
> +	if (ctx && ctx->base) {
>   		struct erofs_buf buf = {
> -			.page = kmap_to_page(ptr),
> -			.base = ptr,
> +			.page = ctx->page,
> +			.base = ctx->base,
>   		};
>   
>   		DBG_BUGON(iomap->type != IOMAP_INLINE);
>   		erofs_put_metabuf(&buf);

so need to nullify ctx->base here:

		ctx->base = NULL;

> -	} else {
> -		DBG_BUGON(iomap->type == IOMAP_INLINE);
>   	}
>   	return written;
>   }
> @@ -369,18 +384,36 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
>    */
>   static int erofs_read_folio(struct file *file, struct folio *folio)
>   {
> +	struct iomap_read_folio_ctx read_ctx = {
> +		.ops		= &iomap_bio_read_ops,
> +		.cur_folio	= folio,
> +	};
> +	struct erofs_iomap_iter_ctx iter_ctx = {
> +		.page		= NULL,
> +		.base		= NULL,
> +	};

it can be initialized just by:
	struct erofs_iomap_iter_ctx iter_ctx = {};

> +
>   	trace_erofs_read_folio(folio, true);
>   
> -	iomap_bio_read_folio(folio, &erofs_iomap_ops);
> +	iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
>   	return 0;
>   }
>   
>   static void erofs_readahead(struct readahead_control *rac)
>   {
> +	struct iomap_read_folio_ctx read_ctx = {
> +		.ops		= &iomap_bio_read_ops,
> +		.rac		= rac,
> +	};
> +	struct erofs_iomap_iter_ctx iter_ctx = {
> +		.page		= NULL,
> +		.base		= NULL,
> +	};

Same here.

> +
>   	trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
>   					readahead_count(rac), true);
>   
> -	iomap_bio_readahead(rac, &erofs_iomap_ops);
> +	iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
>   }
>   
>   static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
> @@ -400,9 +433,15 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>   	if (IS_DAX(inode))
>   		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
>   #endif
> -	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
> +	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
> +		struct erofs_iomap_iter_ctx iter_ctx = {
> +			.page = NULL,
> +			.base = NULL,
> +		};

Same here again.

Thanks,
Gao Xiang
Re: [PATCH v8 2/9] erofs: hold read context in iomap_iter if needed
Posted by Hongbo Li 2 months, 3 weeks ago
Hi Xiang,

On 2025/11/16 20:01, Gao Xiang wrote:
> 
> 
> On 2025/11/14 17:55, Hongbo Li wrote:
>> Uncoming page cache sharing needs pass read context to iomap_iter,
>> here we unify the way of passing the read context in EROFS. Moreover,
>> bmap and fiemap don't need to map the inline data.
>>
>> Note that we keep `struct page *` in `struct erofs_iomap_iter_ctx` as
>> well to avoid bogus kmap_to_page usage.
>>
>> Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
>> ---
>>   fs/erofs/data.c | 79 ++++++++++++++++++++++++++++++++++++-------------
>>   1 file changed, 59 insertions(+), 20 deletions(-)
>>
>> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
>> index bb13c4cb8455..bd3d85c61341 100644
>> --- a/fs/erofs/data.c
>> +++ b/fs/erofs/data.c
>> @@ -266,14 +266,23 @@ void erofs_onlinefolio_end(struct folio *folio, 
>> int err, bool dirty)
>>       folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
>>   }
>> +struct erofs_iomap_iter_ctx {
>> +    struct page *page;
>> +    void *base;
>> +};
>> +
>>   static int erofs_iomap_begin(struct inode *inode, loff_t offset, 
>> loff_t length,
>>           unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
>>   {
>>       int ret;
>> +    struct erofs_iomap_iter_ctx *ctx;
>>       struct super_block *sb = inode->i_sb;
>>       struct erofs_map_blocks map;
>>       struct erofs_map_dev mdev;
>> +    struct iomap_iter *iter;
>> +    iter = container_of(iomap, struct iomap_iter, iomap);
>> +    ctx = iter->private;
> 
> Can you just rearrange it as:
> 
>      struct iomap_iter *iter = container_of(iomap, struct iomap_iter, 
> iomap);
>      struct erofs_iomap_iter_ctx *ctx = iter->private;
> 
> ?
> 

Thanks for your through review. The points you raised are quite 
reasonable, and I will address them in later version.

Thanks,
Hongbo

>>       map.m_la = offset;
>>       map.m_llen = length;
>>       ret = erofs_map_blocks(inode, &map);
>> @@ -283,7 +292,8 @@ static int erofs_iomap_begin(struct inode *inode, 
>> loff_t offset, loff_t length,
>>       iomap->offset = map.m_la;
>>       iomap->length = map.m_llen;
>>       iomap->flags = 0;
>> -    iomap->private = NULL;
>> +    if (ctx)
>> +        ctx->base = NULL;
> 
> I think this line is unnecessary if iter->private == ctx;
> 
>>       iomap->addr = IOMAP_NULL_ADDR;
>>       if (!(map.m_flags & EROFS_MAP_MAPPED)) {
>>           iomap->type = IOMAP_HOLE;
>> @@ -309,16 +319,20 @@ static int erofs_iomap_begin(struct inode 
>> *inode, loff_t offset, loff_t length,
>>       }
>>       if (map.m_flags & EROFS_MAP_META) {
>> -        void *ptr;
>> -        struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
>> -
>>           iomap->type = IOMAP_INLINE;
>> -        ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
>> -                     erofs_inode_in_metabox(inode));
>> -        if (IS_ERR(ptr))
>> -            return PTR_ERR(ptr);
>> -        iomap->inline_data = ptr;
>> -        iomap->private = buf.base;
>> +        /* read context should read the inlined data */
>> +        if (ctx) {
>> +            void *ptr;
>> +            struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
> 
> better to resort them as:
>              struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
>              void *ptr;
> 
>> +
>> +            ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
>> +                         erofs_inode_in_metabox(inode));
>> +            if (IS_ERR(ptr))
>> +                return PTR_ERR(ptr);
>> +            iomap->inline_data = ptr;
>> +            ctx->page = buf.page;
>> +            ctx->base = buf.base;
>> +        }
>>       } else {
>>           iomap->type = IOMAP_MAPPED;
>>       }
>> @@ -328,18 +342,19 @@ static int erofs_iomap_begin(struct inode 
>> *inode, loff_t offset, loff_t length,
>>   static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t 
>> length,
>>           ssize_t written, unsigned int flags, struct iomap *iomap)
>>   {
>> -    void *ptr = iomap->private;
>> +    struct erofs_iomap_iter_ctx *ctx;
>> +    struct iomap_iter *iter;
>> -    if (ptr) {
>> +    iter = container_of(iomap, struct iomap_iter, iomap);
>> +    ctx = iter->private;
>> +    if (ctx && ctx->base) {
>>           struct erofs_buf buf = {
>> -            .page = kmap_to_page(ptr),
>> -            .base = ptr,
>> +            .page = ctx->page,
>> +            .base = ctx->base,
>>           };
>>           DBG_BUGON(iomap->type != IOMAP_INLINE);
>>           erofs_put_metabuf(&buf);
> 
> so need to nullify ctx->base here:
> 
>          ctx->base = NULL;
> 
>> -    } else {
>> -        DBG_BUGON(iomap->type == IOMAP_INLINE);
>>       }
>>       return written;
>>   }
>> @@ -369,18 +384,36 @@ int erofs_fiemap(struct inode *inode, struct 
>> fiemap_extent_info *fieinfo,
>>    */
>>   static int erofs_read_folio(struct file *file, struct folio *folio)
>>   {
>> +    struct iomap_read_folio_ctx read_ctx = {
>> +        .ops        = &iomap_bio_read_ops,
>> +        .cur_folio    = folio,
>> +    };
>> +    struct erofs_iomap_iter_ctx iter_ctx = {
>> +        .page        = NULL,
>> +        .base        = NULL,
>> +    };
> 
> it can be initialized just by:
>      struct erofs_iomap_iter_ctx iter_ctx = {};
> 
>> +
>>       trace_erofs_read_folio(folio, true);
>> -    iomap_bio_read_folio(folio, &erofs_iomap_ops);
>> +    iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
>>       return 0;
>>   }
>>   static void erofs_readahead(struct readahead_control *rac)
>>   {
>> +    struct iomap_read_folio_ctx read_ctx = {
>> +        .ops        = &iomap_bio_read_ops,
>> +        .rac        = rac,
>> +    };
>> +    struct erofs_iomap_iter_ctx iter_ctx = {
>> +        .page        = NULL,
>> +        .base        = NULL,
>> +    };
> 
> Same here.
> 
>> +
>>       trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
>>                       readahead_count(rac), true);
>> -    iomap_bio_readahead(rac, &erofs_iomap_ops);
>> +    iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
>>   }
>>   static sector_t erofs_bmap(struct address_space *mapping, sector_t 
>> block)
>> @@ -400,9 +433,15 @@ static ssize_t erofs_file_read_iter(struct kiocb 
>> *iocb, struct iov_iter *to)
>>       if (IS_DAX(inode))
>>           return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
>>   #endif
>> -    if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
>> +    if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
>> +        struct erofs_iomap_iter_ctx iter_ctx = {
>> +            .page = NULL,
>> +            .base = NULL,
>> +        };
> 
> Same here again.
> 
> Thanks,
> Gao Xiang