fs/ext4/inode.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-)
From: Jinke Han <hanjinke.666@bytedance.com>
When disable and enable dioread_nolock by remount, we may see
dioread_lock in ext4_do_writepages while see dioread_nolock in
mpage_map_one_extent. This inconsistency may triger the warning
in ext4_add_complete_io when the io_end->handle is NULL. Although
this warning is harmless in most cases, there is still a risk of
insufficient log reservation in conversion of unwritten extents.
Signed-off-by: Jinke Han <hanjinke.666@bytedance.com>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/inode.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 40579ef513b7..122a22ccddb3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1568,6 +1568,7 @@ struct mpage_da_data {
struct ext4_io_submit io_submit; /* IO submission data */
unsigned int do_map:1;
unsigned int scanned_until_end:1;
+ unsigned int dioread_nolock:1;
};
static void mpage_release_unused_pages(struct mpage_da_data *mpd,
@@ -2391,7 +2392,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
struct inode *inode = mpd->inode;
struct ext4_map_blocks *map = &mpd->map;
int get_blocks_flags;
- int err, dioread_nolock;
+ int err, dioread_nolock = mpd->dioread_nolock;
trace_ext4_da_write_pages_extent(inode, map);
/*
@@ -2412,7 +2413,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL |
EXT4_GET_BLOCKS_IO_SUBMIT;
- dioread_nolock = ext4_should_dioread_nolock(inode);
if (dioread_nolock)
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
if (map->m_flags & BIT(BH_Delay))
@@ -2727,10 +2727,11 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
handle_t *handle = NULL;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
- int needed_blocks, rsv_blocks = 0, ret = 0;
+ int needed_blocks, rsv_blocks = 0, rsv = 0, ret = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
struct blk_plug plug;
bool give_up_on_write = false;
+ bool dioread_nolock;
trace_ext4_writepages(inode, wbc);
@@ -2783,15 +2784,6 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
ext4_journal_stop(handle);
}
- if (ext4_should_dioread_nolock(inode)) {
- /*
- * We may need to convert up to one extent per block in
- * the page and we may dirty the inode.
- */
- rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
- PAGE_SIZE >> inode->i_blkbits);
- }
-
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
@@ -2837,6 +2829,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
goto unplug;
while (!mpd->scanned_until_end && wbc->nr_to_write > 0) {
+ dioread_nolock = ext4_should_dioread_nolock(inode);
+ if (!rsv && dioread_nolock) {
+ /*
+ * We may need to convert up to one extent per block in
+ * the page and we may dirty the inode.
+ */
+ rsv = 1 + ext4_chunk_trans_blocks(inode,
+ PAGE_SIZE >> inode->i_blkbits);
+ }
+ rsv_blocks = dioread_nolock ? rsv : 0;
+ mpd->dioread_nolock = dioread_nolock;
+
/* For each extent of pages we use new io_end */
mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
if (!mpd->io_submit.io_end) {
--
2.20.1
Jinke Han <hanjinke.666@bytedance.com> writes: > From: Jinke Han <hanjinke.666@bytedance.com> > > When disable and enable dioread_nolock by remount, we may see > dioread_lock in ext4_do_writepages while see dioread_nolock in > mpage_map_one_extent. This inconsistency may triger the warning > in ext4_add_complete_io when the io_end->handle is NULL. Although > this warning is harmless in most cases, there is still a risk of > insufficient log reservation in conversion of unwritten extents. > Sorry, I haven't completely gone through the patch yet. But this idea of caching the initial value of mount parameter and passing it do different functions while an I/O request completes, is not looking right to me. If that's the case shouldn't we disallow this mount option to change until all the outstanding I/O's are done or complete? Then we need not cache the value of dioread_nolock at the start of writepages and continue to pass it down in case it it changes. Just my initial thoughts. -ritesh > Signed-off-by: Jinke Han <hanjinke.666@bytedance.com> > Signed-off-by: Zhang Yi <yi.zhang@huawei.com> > --- > fs/ext4/inode.c | 28 ++++++++++++++++------------ > 1 file changed, 16 insertions(+), 12 deletions(-) > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 40579ef513b7..122a22ccddb3 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -1568,6 +1568,7 @@ struct mpage_da_data { > struct ext4_io_submit io_submit; /* IO submission data */ > unsigned int do_map:1; > unsigned int scanned_until_end:1; > + unsigned int dioread_nolock:1; > }; > > static void mpage_release_unused_pages(struct mpage_da_data *mpd, > @@ -2391,7 +2392,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) > struct inode *inode = mpd->inode; > struct ext4_map_blocks *map = &mpd->map; > int get_blocks_flags; > - int err, dioread_nolock; > + int err, dioread_nolock = mpd->dioread_nolock; > > trace_ext4_da_write_pages_extent(inode, map); > /* > @@ -2412,7 +2413,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) > get_blocks_flags = EXT4_GET_BLOCKS_CREATE | > EXT4_GET_BLOCKS_METADATA_NOFAIL | > EXT4_GET_BLOCKS_IO_SUBMIT; > - dioread_nolock = ext4_should_dioread_nolock(inode); > if (dioread_nolock) > get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; > if (map->m_flags & BIT(BH_Delay)) > @@ -2727,10 +2727,11 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) > handle_t *handle = NULL; > struct inode *inode = mpd->inode; > struct address_space *mapping = inode->i_mapping; > - int needed_blocks, rsv_blocks = 0, ret = 0; > + int needed_blocks, rsv_blocks = 0, rsv = 0, ret = 0; > struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); > struct blk_plug plug; > bool give_up_on_write = false; > + bool dioread_nolock; > > trace_ext4_writepages(inode, wbc); > > @@ -2783,15 +2784,6 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) > ext4_journal_stop(handle); > } > > - if (ext4_should_dioread_nolock(inode)) { > - /* > - * We may need to convert up to one extent per block in > - * the page and we may dirty the inode. > - */ > - rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, > - PAGE_SIZE >> inode->i_blkbits); > - } > - > if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) > range_whole = 1; > > @@ -2837,6 +2829,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) > goto unplug; > > while (!mpd->scanned_until_end && wbc->nr_to_write > 0) { > + dioread_nolock = ext4_should_dioread_nolock(inode); > + if (!rsv && dioread_nolock) { > + /* > + * We may need to convert up to one extent per block in > + * the page and we may dirty the inode. > + */ > + rsv = 1 + ext4_chunk_trans_blocks(inode, > + PAGE_SIZE >> inode->i_blkbits); > + } > + rsv_blocks = dioread_nolock ? rsv : 0; > + mpd->dioread_nolock = dioread_nolock; > + > /* For each extent of pages we use new io_end */ > mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); > if (!mpd->io_submit.io_end) { > -- > 2.20.1
在 2023/2/16 下午9:13, Ritesh Harjani (IBM) 写道: > Jinke Han <hanjinke.666@bytedance.com> writes: > >> From: Jinke Han <hanjinke.666@bytedance.com> >> >> When disable and enable dioread_nolock by remount, we may see >> dioread_lock in ext4_do_writepages while see dioread_nolock in >> mpage_map_one_extent. This inconsistency may triger the warning >> in ext4_add_complete_io when the io_end->handle is NULL. Although >> this warning is harmless in most cases, there is still a risk of >> insufficient log reservation in conversion of unwritten extents. >> > > Sorry, I haven't completely gone through the patch yet. But this idea of > caching the initial value of mount parameter and passing it do different > functions while an I/O request completes, is not looking right to me. > > If that's the case shouldn't we disallow this mount option to change > until all the outstanding I/O's are done or complete? > Then we need not cache the value of dioread_nolock at the start of > writepages and continue to pass it down in case it it changes. > > Just my initial thoughts. > > -ritesh > Fair enough, thanks.
© 2016 - 2025 Red Hat, Inc.