[PATCH v2] ext4: make dioread_nolock consistent in each mapping round

Jinke Han posted 1 patch 2 years, 7 months ago
fs/ext4/inode.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
[PATCH v2] ext4: make dioread_nolock consistent in each mapping round
Posted by Jinke Han 2 years, 7 months ago
From: Jinke Han <hanjinke.666@bytedance.com>

When disable and enable dioread_nolock by remount, we may see
dioread_lock in ext4_do_writepages while see dioread_nolock in
mpage_map_one_extent. This inconsistency may triger the warning
in ext4_add_complete_io when the io_end->handle is NULL. Although
this warning is harmless in most cases, there is still a risk of
insufficient log reservation in conversion of unwritten extents.

Signed-off-by: Jinke Han <hanjinke.666@bytedance.com>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/inode.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 40579ef513b7..122a22ccddb3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1568,6 +1568,7 @@ struct mpage_da_data {
 	struct ext4_io_submit io_submit;	/* IO submission data */
 	unsigned int do_map:1;
 	unsigned int scanned_until_end:1;
+	unsigned int dioread_nolock:1;
 };
 
 static void mpage_release_unused_pages(struct mpage_da_data *mpd,
@@ -2391,7 +2392,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
 	struct inode *inode = mpd->inode;
 	struct ext4_map_blocks *map = &mpd->map;
 	int get_blocks_flags;
-	int err, dioread_nolock;
+	int err, dioread_nolock = mpd->dioread_nolock;
 
 	trace_ext4_da_write_pages_extent(inode, map);
 	/*
@@ -2412,7 +2413,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
 	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
 			   EXT4_GET_BLOCKS_METADATA_NOFAIL |
 			   EXT4_GET_BLOCKS_IO_SUBMIT;
-	dioread_nolock = ext4_should_dioread_nolock(inode);
 	if (dioread_nolock)
 		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
 	if (map->m_flags & BIT(BH_Delay))
@@ -2727,10 +2727,11 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
 	handle_t *handle = NULL;
 	struct inode *inode = mpd->inode;
 	struct address_space *mapping = inode->i_mapping;
-	int needed_blocks, rsv_blocks = 0, ret = 0;
+	int needed_blocks, rsv_blocks = 0, rsv = 0, ret = 0;
 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 	struct blk_plug plug;
 	bool give_up_on_write = false;
+	bool dioread_nolock;
 
 	trace_ext4_writepages(inode, wbc);
 
@@ -2783,15 +2784,6 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
 		ext4_journal_stop(handle);
 	}
 
-	if (ext4_should_dioread_nolock(inode)) {
-		/*
-		 * We may need to convert up to one extent per block in
-		 * the page and we may dirty the inode.
-		 */
-		rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
-						PAGE_SIZE >> inode->i_blkbits);
-	}
-
 	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 		range_whole = 1;
 
@@ -2837,6 +2829,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
 		goto unplug;
 
 	while (!mpd->scanned_until_end && wbc->nr_to_write > 0) {
+		dioread_nolock = ext4_should_dioread_nolock(inode);
+		if (!rsv && dioread_nolock) {
+			/*
+			 * We may need to convert up to one extent per block in
+			 * the page and we may dirty the inode.
+			 */
+			rsv = 1 + ext4_chunk_trans_blocks(inode,
+							PAGE_SIZE >> inode->i_blkbits);
+		}
+		rsv_blocks = dioread_nolock ? rsv : 0;
+		mpd->dioread_nolock = dioread_nolock;
+
 		/* For each extent of pages we use new io_end */
 		mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
 		if (!mpd->io_submit.io_end) {
-- 
2.20.1
Re: [PATCH v2] ext4: make dioread_nolock consistent in each mapping round
Posted by Ritesh Harjani (IBM) 2 years, 6 months ago
Jinke Han <hanjinke.666@bytedance.com> writes:

> From: Jinke Han <hanjinke.666@bytedance.com>
>
> When disable and enable dioread_nolock by remount, we may see
> dioread_lock in ext4_do_writepages while see dioread_nolock in
> mpage_map_one_extent. This inconsistency may triger the warning
> in ext4_add_complete_io when the io_end->handle is NULL. Although
> this warning is harmless in most cases, there is still a risk of
> insufficient log reservation in conversion of unwritten extents.
>

Sorry, I haven't completely gone through the patch yet. But this idea of
caching the initial value of mount parameter and passing it do different
functions while an I/O request completes, is not looking right to me.

If that's the case shouldn't we disallow this mount option to change
until all the outstanding I/O's are done or complete?
Then we need not cache the value of dioread_nolock at the start of
writepages and continue to pass it down in case it it changes.

Just my initial thoughts.

-ritesh

> Signed-off-by: Jinke Han <hanjinke.666@bytedance.com>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
>  fs/ext4/inode.c | 28 ++++++++++++++++------------
>  1 file changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 40579ef513b7..122a22ccddb3 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1568,6 +1568,7 @@ struct mpage_da_data {
>  	struct ext4_io_submit io_submit;	/* IO submission data */
>  	unsigned int do_map:1;
>  	unsigned int scanned_until_end:1;
> +	unsigned int dioread_nolock:1;
>  };
>
>  static void mpage_release_unused_pages(struct mpage_da_data *mpd,
> @@ -2391,7 +2392,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
>  	struct inode *inode = mpd->inode;
>  	struct ext4_map_blocks *map = &mpd->map;
>  	int get_blocks_flags;
> -	int err, dioread_nolock;
> +	int err, dioread_nolock = mpd->dioread_nolock;
>
>  	trace_ext4_da_write_pages_extent(inode, map);
>  	/*
> @@ -2412,7 +2413,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
>  	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
>  			   EXT4_GET_BLOCKS_METADATA_NOFAIL |
>  			   EXT4_GET_BLOCKS_IO_SUBMIT;
> -	dioread_nolock = ext4_should_dioread_nolock(inode);
>  	if (dioread_nolock)
>  		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
>  	if (map->m_flags & BIT(BH_Delay))
> @@ -2727,10 +2727,11 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
>  	handle_t *handle = NULL;
>  	struct inode *inode = mpd->inode;
>  	struct address_space *mapping = inode->i_mapping;
> -	int needed_blocks, rsv_blocks = 0, ret = 0;
> +	int needed_blocks, rsv_blocks = 0, rsv = 0, ret = 0;
>  	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
>  	struct blk_plug plug;
>  	bool give_up_on_write = false;
> +	bool dioread_nolock;
>
>  	trace_ext4_writepages(inode, wbc);
>
> @@ -2783,15 +2784,6 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
>  		ext4_journal_stop(handle);
>  	}
>
> -	if (ext4_should_dioread_nolock(inode)) {
> -		/*
> -		 * We may need to convert up to one extent per block in
> -		 * the page and we may dirty the inode.
> -		 */
> -		rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
> -						PAGE_SIZE >> inode->i_blkbits);
> -	}
> -
>  	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
>  		range_whole = 1;
>
> @@ -2837,6 +2829,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
>  		goto unplug;
>
>  	while (!mpd->scanned_until_end && wbc->nr_to_write > 0) {
> +		dioread_nolock = ext4_should_dioread_nolock(inode);
> +		if (!rsv && dioread_nolock) {
> +			/*
> +			 * We may need to convert up to one extent per block in
> +			 * the page and we may dirty the inode.
> +			 */
> +			rsv = 1 + ext4_chunk_trans_blocks(inode,
> +							PAGE_SIZE >> inode->i_blkbits);
> +		}
> +		rsv_blocks = dioread_nolock ? rsv : 0;
> +		mpd->dioread_nolock = dioread_nolock;
> +
>  		/* For each extent of pages we use new io_end */
>  		mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
>  		if (!mpd->io_submit.io_end) {
> --
> 2.20.1
Re: [External] Re: [PATCH v2] ext4: make dioread_nolock consistent in each mapping round
Posted by hanjinke 2 years, 6 months ago

在 2023/2/16 下午9:13, Ritesh Harjani (IBM) 写道:
> Jinke Han <hanjinke.666@bytedance.com> writes:
> 
>> From: Jinke Han <hanjinke.666@bytedance.com>
>>
>> When disable and enable dioread_nolock by remount, we may see
>> dioread_lock in ext4_do_writepages while see dioread_nolock in
>> mpage_map_one_extent. This inconsistency may triger the warning
>> in ext4_add_complete_io when the io_end->handle is NULL. Although
>> this warning is harmless in most cases, there is still a risk of
>> insufficient log reservation in conversion of unwritten extents.
>>
> 
> Sorry, I haven't completely gone through the patch yet. But this idea of
> caching the initial value of mount parameter and passing it do different
> functions while an I/O request completes, is not looking right to me.
> 
> If that's the case shouldn't we disallow this mount option to change
> until all the outstanding I/O's are done or complete?
> Then we need not cache the value of dioread_nolock at the start of
> writepages and continue to pass it down in case it it changes.
> 
> Just my initial thoughts.
> 
> -ritesh
> 

Fair enough, thanks.