call ext4_mb_mark_group_bb in ext4_free_blocks_simple to:
1. remove repeat code
2. pair update of free_clusters in ext4_mb_new_blocks_simple.
3. add missing ext4_lock_group/ext4_unlock_group protection.
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
---
fs/ext4/mballoc.c | 39 +++++++--------------------------------
1 file changed, 7 insertions(+), 32 deletions(-)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 685dcc17bf7c..dae4533411f7 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3798,6 +3798,8 @@ ext4_mb_mark_group_bb(struct ext4_mark_context *mc, ext4_group_t group,
ext4_lock_group(sb, group);
if (ext4_has_group_desc_csum(sb) &&
(gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+ WARN_ON(mc->state == 0);
+
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp));
@@ -6120,43 +6122,16 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
unsigned long count)
{
- struct buffer_head *bitmap_bh;
+ struct ext4_mark_context mc = {
+ .sb = inode->i_sb,
+ .state = 0,
+ };
struct super_block *sb = inode->i_sb;
- struct ext4_group_desc *gdp;
- struct buffer_head *gdp_bh;
ext4_group_t group;
ext4_grpblk_t blkoff;
- int already_freed = 0, err, i;
ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- pr_warn("Failed to read block bitmap\n");
- return;
- }
- gdp = ext4_get_group_desc(sb, group, &gdp_bh);
- if (!gdp)
- goto err_out;
-
- for (i = 0; i < count; i++) {
- if (!mb_test_bit(blkoff + i, bitmap_bh->b_data))
- already_freed++;
- }
- mb_clear_bits(bitmap_bh->b_data, blkoff, count);
- err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
- if (err)
- goto err_out;
- ext4_free_group_clusters_set(
- sb, gdp, ext4_free_group_clusters(sb, gdp) +
- count - already_freed);
- ext4_block_bitmap_csum_set(sb, gdp, bitmap_bh);
- ext4_group_desc_csum_set(sb, group, gdp);
- ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
- sync_dirty_buffer(bitmap_bh);
- sync_dirty_buffer(gdp_bh);
-
-err_out:
- brelse(bitmap_bh);
+ ext4_mb_mark_group_bb(&mc, group, blkoff, count);
}
/**
--
2.30.0
On Sat, Jun 03, 2023 at 11:03:21PM +0800, Kemeng Shi wrote: > call ext4_mb_mark_group_bb in ext4_free_blocks_simple to: > 1. remove repeat code > 2. pair update of free_clusters in ext4_mb_new_blocks_simple. > 3. add missing ext4_lock_group/ext4_unlock_group protection. > > Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com> > Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> Note: after bisecting, I've found that this commit is causing a OOPS when running "kvm-xfstests -c ext4/adv generic/468". It appears to be an issue with the fast commit feature not playing nice with this patch. The stack trace looks like this: [ 7.409663] ------------[ cut here ]------------ [ 7.409969] WARNING: CPU: 0 PID: 3069 at fs/ext4/mballoc.c:3801 ext4_mb_mark_group_bb+0x48e/0x4a0 [ 7.410480] CPU: 0 PID: 3069 Comm: mount Not tainted 6.4.0-rc5-xfstests-lockdep-00021-g60ba685c5998 #146 [ 7.411067] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 7.411639] RIP: 0010:ext4_mb_mark_group_bb+0x48e/0x4a0 [ 7.411968] Code: 48 c7 c7 35 b0 88 82 c6 05 16 f4 9b 01 01 e8 f9 16 c9 ff e9 7f fe ff ff 8b 45 08 c7 44 24 10 00 00 00 00 31 c9 e9 ef fc ff ff <0f> 0b e9 76 fc ff ff e8 96 64 b6 00 66 0f 1f 44 00 00 90 90 90 90 [ 7.413128] RSP: 0018:ffffc90003b0f9f8 EFLAGS: 00010246 [ 7.413458] RAX: 0000000000000003 RBX: 0000000000006002 RCX: 0000000000000001 [ 7.413902] RDX: ffff88800965b000 RSI: 0000000000000000 RDI: ffff88800d690100 [ 7.414346] RBP: ffffc90003b0fa68 R08: 000000000aebbd6e R09: 0000000000000246 [ 7.414791] R10: 00000000d148c994 R11: 00000000941da2bb R12: ffff88800d7fd000 [ 7.415234] R13: 0000000000000000 R14: ffff88800f3e4080 R15: ffff88800b5ca160 [ 7.415724] FS: 00007f3d04516840(0000) GS:ffff88807da00000(0000) knlGS:0000000000000000 [ 7.416227] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7.416588] CR2: 00007ffcb3979ac8 CR3: 000000000f290003 CR4: 0000000000770ef0 [ 7.417032] PKRU: 55555554 [ 7.417205] Call Trace: [ 7.417363] <TASK> [ 7.417502] ? ext4_mb_mark_group_bb+0x48e/0x4a0 [ 7.417807] ? __warn+0x80/0x170 [ 7.418051] ? ext4_mb_mark_group_bb+0x48e/0x4a0 [ 7.418337] ? report_bug+0x173/0x1d0 [ 7.418567] ? handle_bug+0x3c/0x70 [ 7.418797] ? exc_invalid_op+0x17/0x70 [ 7.419037] ? asm_exc_invalid_op+0x1a/0x20 [ 7.419226] ? ext4_mb_mark_group_bb+0x48e/0x4a0 [ 7.419437] ? ext4_mb_mark_group_bb+0xae/0x4a0 [ 7.419708] ext4_mb_mark_bb+0xc0/0x120 [ 7.419946] ext4_ext_clear_bb+0x210/0x280 [ 7.420198] ext4_fc_replay_inode+0xa1/0x380 [ 7.420466] ext4_fc_replay+0x435/0x880 [ 7.420703] ? __getblk_gfp+0x37/0x110 [ 7.420938] ? jread+0x7a/0x180 [ 7.421138] do_one_pass+0x7df/0x1040 [ 7.421365] jbd2_journal_recover+0x150/0x250 [ 7.421637] jbd2_journal_load+0xbe/0x190 [ 7.421886] ext4_load_journal+0x214/0x610 [ 7.422152] ext4_load_and_init_journal+0x29/0x380 [ 7.422490] __ext4_fill_super+0x15ca/0x15e0 [ 7.422756] ? __pfx_ext4_fill_super+0x10/0x10 [ 7.423032] ext4_fill_super+0xcf/0x280 [ 7.423270] get_tree_bdev+0x188/0x290 [ 7.423505] vfs_get_tree+0x29/0xe0 [ 7.423723] ? capable+0x37/0x70 [ 7.423927] do_new_mount+0x174/0x300 [ 7.424157] __x64_sys_mount+0x11a/0x150 [ 7.424401] do_syscall_64+0x3b/0x90 [ 7.424624] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 7.424935] RIP: 0033:0x7f3d0475562a [ 7.425160] Code: 48 8b 0d 69 18 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 36 18 0d 00 f7 d8 64 89 01 48 [ 7.426298] RSP: 002b:00007ffcb397aaf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [ 7.426761] RAX: ffffffffffffffda RBX: 00007f3d04889264 RCX: 00007f3d0475562a [ 7.427197] RDX: 0000558ea381db90 RSI: 0000558ea381dbb0 RDI: 0000558ea381dbd0 [ 7.427631] RBP: 0000558ea381d960 R08: 0000558ea381dbf0 R09: 00007f3d04827be0 [ 7.428063] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 7.428499] R13: 0000558ea381dbd0 R14: 0000558ea381db90 R15: 0000558ea381d960 [ 7.428941] </TASK> [ 7.429083] irq event stamp: 10951 [ 7.429296] hardirqs last enabled at (10959): [<ffffffff811643c2>] __up_console_sem+0x52/0x60 [ 7.429824] hardirqs last disabled at (10966): [<ffffffff811643a7>] __up_console_sem+0x37/0x60 [ 7.430325] softirqs last enabled at (10574): [<ffffffff8204a529>] __do_softirq+0x2d9/0x39e [ 7.430839] softirqs last disabled at (10407): [<ffffffff810dcc57>] __irq_exit_rcu+0x87/0xb0 [ 7.431354] ---[ end trace 0000000000000000 ]---
on 6/11/2023 1:05 PM, Theodore Ts'o wrote: > On Sat, Jun 03, 2023 at 11:03:21PM +0800, Kemeng Shi wrote: >> call ext4_mb_mark_group_bb in ext4_free_blocks_simple to: >> 1. remove repeat code >> 2. pair update of free_clusters in ext4_mb_new_blocks_simple. >> 3. add missing ext4_lock_group/ext4_unlock_group protection. >> >> Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com> >> Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> > > Note: after bisecting, I've found that this commit is causing a OOPS > when running "kvm-xfstests -c ext4/adv generic/468". It appears to be > an issue with the fast commit feature not playing nice with this > patch. The stack trace looks like this: > > [ 7.409663] ------------[ cut here ]------------ > [ 7.409969] WARNING: CPU: 0 PID: 3069 at fs/ext4/mballoc.c:3801 ext4_mb_mark_group_bb+0x48e/0x4a0 > [ 7.410480] CPU: 0 PID: 3069 Comm: mount Not tainted 6.4.0-rc5-xfstests-lockdep-00021-g60ba685c5998 #146 > [ 7.411067] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > [ 7.411639] RIP: 0010:ext4_mb_mark_group_bb+0x48e/0x4a0 > [ 7.411968] Code: 48 c7 c7 35 b0 88 82 c6 05 16 f4 9b 01 01 e8 f9 16 c9 ff e9 7f fe ff ff 8b 45 08 c7 44 24 10 00 00 00 00 31 c9 e9 ef fc ff ff <0f> 0b e9 76 fc ff ff e8 96 64 b6 00 66 0f 1f 44 00 00 90 90 90 90 > [ 7.413128] RSP: 0018:ffffc90003b0f9f8 EFLAGS: 00010246 > [ 7.413458] RAX: 0000000000000003 RBX: 0000000000006002 RCX: 0000000000000001 > [ 7.413902] RDX: ffff88800965b000 RSI: 0000000000000000 RDI: ffff88800d690100 > [ 7.414346] RBP: ffffc90003b0fa68 R08: 000000000aebbd6e R09: 0000000000000246 > [ 7.414791] R10: 00000000d148c994 R11: 00000000941da2bb R12: ffff88800d7fd000 > [ 7.415234] R13: 0000000000000000 R14: ffff88800f3e4080 R15: ffff88800b5ca160 > [ 7.415724] FS: 00007f3d04516840(0000) GS:ffff88807da00000(0000) knlGS:0000000000000000 > [ 7.416227] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 7.416588] CR2: 00007ffcb3979ac8 CR3: 000000000f290003 CR4: 0000000000770ef0 > [ 7.417032] PKRU: 55555554 > [ 7.417205] Call Trace: > [ 7.417363] <TASK> > [ 7.417502] ? ext4_mb_mark_group_bb+0x48e/0x4a0 > [ 7.417807] ? __warn+0x80/0x170 > [ 7.418051] ? ext4_mb_mark_group_bb+0x48e/0x4a0 > [ 7.418337] ? report_bug+0x173/0x1d0 > [ 7.418567] ? handle_bug+0x3c/0x70 > [ 7.418797] ? exc_invalid_op+0x17/0x70 > [ 7.419037] ? asm_exc_invalid_op+0x1a/0x20 > [ 7.419226] ? ext4_mb_mark_group_bb+0x48e/0x4a0 > [ 7.419437] ? ext4_mb_mark_group_bb+0xae/0x4a0 > [ 7.419708] ext4_mb_mark_bb+0xc0/0x120 > [ 7.419946] ext4_ext_clear_bb+0x210/0x280 > [ 7.420198] ext4_fc_replay_inode+0xa1/0x380 > [ 7.420466] ext4_fc_replay+0x435/0x880 > [ 7.420703] ? __getblk_gfp+0x37/0x110 > [ 7.420938] ? jread+0x7a/0x180 > [ 7.421138] do_one_pass+0x7df/0x1040 > [ 7.421365] jbd2_journal_recover+0x150/0x250 > [ 7.421637] jbd2_journal_load+0xbe/0x190 > [ 7.421886] ext4_load_journal+0x214/0x610 > [ 7.422152] ext4_load_and_init_journal+0x29/0x380 > [ 7.422490] __ext4_fill_super+0x15ca/0x15e0 > [ 7.422756] ? __pfx_ext4_fill_super+0x10/0x10 > [ 7.423032] ext4_fill_super+0xcf/0x280 > [ 7.423270] get_tree_bdev+0x188/0x290 > [ 7.423505] vfs_get_tree+0x29/0xe0 > [ 7.423723] ? capable+0x37/0x70 > [ 7.423927] do_new_mount+0x174/0x300 > [ 7.424157] __x64_sys_mount+0x11a/0x150 > [ 7.424401] do_syscall_64+0x3b/0x90 > [ 7.424624] entry_SYSCALL_64_after_hwframe+0x72/0xdc > [ 7.424935] RIP: 0033:0x7f3d0475562a > [ 7.425160] Code: 48 8b 0d 69 18 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 36 18 0d 00 f7 d8 64 89 01 48 > [ 7.426298] RSP: 002b:00007ffcb397aaf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 > [ 7.426761] RAX: ffffffffffffffda RBX: 00007f3d04889264 RCX: 00007f3d0475562a > [ 7.427197] RDX: 0000558ea381db90 RSI: 0000558ea381dbb0 RDI: 0000558ea381dbd0 > [ 7.427631] RBP: 0000558ea381d960 R08: 0000558ea381dbf0 R09: 00007f3d04827be0 > [ 7.428063] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 > [ 7.428499] R13: 0000558ea381dbd0 R14: 0000558ea381db90 R15: 0000558ea381d960 > [ 7.428941] </TASK> > [ 7.429083] irq event stamp: 10951 > [ 7.429296] hardirqs last enabled at (10959): [<ffffffff811643c2>] __up_console_sem+0x52/0x60 > [ 7.429824] hardirqs last disabled at (10966): [<ffffffff811643a7>] __up_console_sem+0x37/0x60 > [ 7.430325] softirqs last enabled at (10574): [<ffffffff8204a529>] __do_softirq+0x2d9/0x39e > [ 7.430839] softirqs last disabled at (10407): [<ffffffff810dcc57>] __irq_exit_rcu+0x87/0xb0 > [ 7.431354] ---[ end trace 0000000000000000 ]--- > > Hi ted, sorry for this issue. This patch added a WARN_ON for case that we free block to uninitialized block group which should be invalid. We can simply remove the WARN_ON to allow free on uninitialized block group as old way for emergency fix and I will find out why we free blocks to uninitialized block group in fast commit code path and is it a valid behavior. -- Best wishes Kemeng Shi
On Mon, Jun 12, 2023 at 10:24:55AM +0800, Kemeng Shi wrote: > Hi ted, sorry for this issue. This patch added a WARN_ON for case that we free block > to uninitialized block group which should be invalid. > We can simply remove the WARN_ON to allow free on uninitialized block group as old > way for emergency fix and I will find out why we free blocks to uninitialized block > group in fast commit code path and is it a valid behavior. What I've done for now in the dev branch was to drop patches 12 through 19 of this patch series. That seemed to be a good break point, and I wanted to make sure we had something working so we can start doing a lot more intesive testing on the patches so far. Also, that way, when you resend the last 8 patches in the patch series, we can make sure they get a proper review as opposed to making changes on the fly. The current contents of the dev branch are: % git log --reverse --oneline origin..dev 40fa8be3852f ext4: kill unused function ext4_journalled_write_inline_data a030569c34be ext4: Change remaining tracepoints to use folio d1ffc6fb5ded ext4: Make mpage_journal_page_buffers use folio 5ac99c22fa84 ext4: Make ext4_write_inline_data_end() use folio d578dfc510cf ext4: Call fsverity_verify_folio() 30f0bd64ed09 ext4: fix wrong unit use in ext4_mb_normalize_request b9dc976cc348 ext4: fix unit mismatch in ext4_mb_new_blocks_simple 9afc5e21107a ext4: fix wrong unit use in ext4_mb_find_by_goal 860f86ccff6e ext4: treat stripe in block unit 710c384f1536 ext4: add EXT4_MB_HINT_GOAL_ONLY test in ext4_mb_use_preallocated f242d8a98a6f ext4: remove ext4_block_group and ext4_block_group_offset declaration 5b859728b98b ext4: try all groups in ext4_mb_new_blocks_simple ea7bbd168135 ext4: get block from bh before pass it to ext4_free_blocks_simple in ext4_free_blocks 757d9100a5d1 ext4: remove unsed parameter and unnecessary forward declaration of ext4_mb_new_blocks_simple 5d62e6da25f5 ext4: fix wrong unit use in ext4_mb_clear_bb 993d22f0a250 ext4: fix wrong unit use in ext4_mb_new_blocks bf4f2aa4844a ext4: mballoc: Remove useless setting of ac_criteria 743f4dd07bf9 ext4: Remove unused extern variables declaration bc40109767b3 ext4: Convert mballoc cr (criteria) to enum 52e3814a1342 ext4: Add per CR extent scanned counter a15c09da1255 ext4: Add counter to track successful allocation of goal length 26cbe38f0275 ext4: Avoid scanning smaller extents in BG during CR1 9c8f8195852c ext4: Don't skip prefetching BLOCK_UNINIT groups cd303d98b9b5 ext4: Ensure ext4_mb_prefetch_fini() is called for all prefetched BGs ea639ce794e5 ext4: Abstract out logic to search average fragment list b080c84db854 ext4: Add allocation criteria 1.5 (CR1_5) 3a08f7ac3bfa ext4: Give symbolic names to mballoc criterias d14b5d0b1373 ext4: only update i_reserved_data_blocks on successful block allocation b352d1f09a20 ext4: add a new helper to check if es must be kept 579c020ea7b7 ext4: factor out __es_alloc_extent() and __es_free_extent() f4ddcde91d00 ext4: use pre-allocated es in __es_insert_extent() e77481862663 ext4: use pre-allocated es in __es_remove_extent() 28774513875c ext4: using nofail preallocation in ext4_es_remove_extent() e109a1db5b09 ext4: using nofail preallocation in ext4_es_insert_delayed_block() 14d876070f03 ext4: using nofail preallocation in ext4_es_insert_extent() 2af6f615b18b ext4: make ext4_es_remove_extent() return void 0ee9cccd1971 ext4: make ext4_es_insert_delayed_block() return void 7a7c285c485d ext4: make ext4_es_insert_extent() return void 9d1c6dea1aa3 ext4: make ext4_zeroout_es() return void 2e3f4cdef544 ext4: clean up mballoc criteria comments acef67482edf ext4: allow concurrent unaligned dio overwrites 63bc068f0d1a ext4: Fix reusing stale buffer heads from last failed mounting 3a57c2f88be3 ext4: ext4_put_super: Remove redundant checking for 'sbi->s_journal_bdev' 6b960d2155f9 jbd2: remove unused feature macros 4b049709e652 jbd2: switch to check format version in superblock directly d9eafe0afafa jbd2: factor out journal initialization from journal_get_superblock() 6eecd1f4c7ef jbd2: remove j_format_version 431ca11fafd3 jbd2: continue to record log between each mount 2ea31402649c ext4: add journal cycled recording support a228f0e153f6 ext4: update doc about journal superblock description f9c45d83f4da ext4: turning quotas off if mount failed after enable quotas 5404e4738054 ext4: refactoring to use the unified helper ext4_quotas_off() d3ab1bca26b4 jbd2: recheck chechpointing non-dirty buffer 7b0cfe40a991 jbd2: remove t_checkpoint_io_list e86f802ab8d4 jbd2: remove journal_clean_one_cp_list() e8ece5c78f36 jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint cdffaad9649e jbd2: fix a race when checking checkpoint buffer busy 11761ed6026e jbd2: remove __journal_try_to_free_buffer() Cheers, - Ted
on 6/12/2023 11:49 AM, Theodore Ts'o wrote: > On Mon, Jun 12, 2023 at 10:24:55AM +0800, Kemeng Shi wrote: > >> Hi ted, sorry for this issue. This patch added a WARN_ON for case that we free block >> to uninitialized block group which should be invalid. >> We can simply remove the WARN_ON to allow free on uninitialized block group as old >> way for emergency fix and I will find out why we free blocks to uninitialized block >> group in fast commit code path and is it a valid behavior. > > What I've done for now in the dev branch was to drop patches 12 > through 19 of this patch series. That seemed to be a good break > point, and I wanted to make sure we had something working so we can > start doing a lot more intesive testing on the patches so far. > > Also, that way, when you resend the last 8 patches in the patch > series, we can make sure they get a proper review as opposed to making > changes on the fly. Sure, I will resend last 8 patches after I solve the issue. I can also take my time to look at problem in this way :) > The current contents of the dev branch are: > > % git log --reverse --oneline origin..dev > 40fa8be3852f ext4: kill unused function ext4_journalled_write_inline_data > a030569c34be ext4: Change remaining tracepoints to use folio > d1ffc6fb5ded ext4: Make mpage_journal_page_buffers use folio > 5ac99c22fa84 ext4: Make ext4_write_inline_data_end() use folio > d578dfc510cf ext4: Call fsverity_verify_folio() > 30f0bd64ed09 ext4: fix wrong unit use in ext4_mb_normalize_request > b9dc976cc348 ext4: fix unit mismatch in ext4_mb_new_blocks_simple > 9afc5e21107a ext4: fix wrong unit use in ext4_mb_find_by_goal > 860f86ccff6e ext4: treat stripe in block unit > 710c384f1536 ext4: add EXT4_MB_HINT_GOAL_ONLY test in ext4_mb_use_preallocated > f242d8a98a6f ext4: remove ext4_block_group and ext4_block_group_offset declaration > 5b859728b98b ext4: try all groups in ext4_mb_new_blocks_simple > ea7bbd168135 ext4: get block from bh before pass it to ext4_free_blocks_simple in ext4_free_blocks > 757d9100a5d1 ext4: remove unsed parameter and unnecessary forward declaration of ext4_mb_new_blocks_simple > 5d62e6da25f5 ext4: fix wrong unit use in ext4_mb_clear_bb > 993d22f0a250 ext4: fix wrong unit use in ext4_mb_new_blocks > bf4f2aa4844a ext4: mballoc: Remove useless setting of ac_criteria > 743f4dd07bf9 ext4: Remove unused extern variables declaration > bc40109767b3 ext4: Convert mballoc cr (criteria) to enum > 52e3814a1342 ext4: Add per CR extent scanned counter > a15c09da1255 ext4: Add counter to track successful allocation of goal length > 26cbe38f0275 ext4: Avoid scanning smaller extents in BG during CR1 > 9c8f8195852c ext4: Don't skip prefetching BLOCK_UNINIT groups > cd303d98b9b5 ext4: Ensure ext4_mb_prefetch_fini() is called for all prefetched BGs > ea639ce794e5 ext4: Abstract out logic to search average fragment list > b080c84db854 ext4: Add allocation criteria 1.5 (CR1_5) > 3a08f7ac3bfa ext4: Give symbolic names to mballoc criterias > d14b5d0b1373 ext4: only update i_reserved_data_blocks on successful block allocation > b352d1f09a20 ext4: add a new helper to check if es must be kept > 579c020ea7b7 ext4: factor out __es_alloc_extent() and __es_free_extent() > f4ddcde91d00 ext4: use pre-allocated es in __es_insert_extent() > e77481862663 ext4: use pre-allocated es in __es_remove_extent() > 28774513875c ext4: using nofail preallocation in ext4_es_remove_extent() > e109a1db5b09 ext4: using nofail preallocation in ext4_es_insert_delayed_block() > 14d876070f03 ext4: using nofail preallocation in ext4_es_insert_extent() > 2af6f615b18b ext4: make ext4_es_remove_extent() return void > 0ee9cccd1971 ext4: make ext4_es_insert_delayed_block() return void > 7a7c285c485d ext4: make ext4_es_insert_extent() return void > 9d1c6dea1aa3 ext4: make ext4_zeroout_es() return void > 2e3f4cdef544 ext4: clean up mballoc criteria comments > acef67482edf ext4: allow concurrent unaligned dio overwrites > 63bc068f0d1a ext4: Fix reusing stale buffer heads from last failed mounting > 3a57c2f88be3 ext4: ext4_put_super: Remove redundant checking for 'sbi->s_journal_bdev' > 6b960d2155f9 jbd2: remove unused feature macros > 4b049709e652 jbd2: switch to check format version in superblock directly > d9eafe0afafa jbd2: factor out journal initialization from journal_get_superblock() > 6eecd1f4c7ef jbd2: remove j_format_version > 431ca11fafd3 jbd2: continue to record log between each mount > 2ea31402649c ext4: add journal cycled recording support > a228f0e153f6 ext4: update doc about journal superblock description > f9c45d83f4da ext4: turning quotas off if mount failed after enable quotas > 5404e4738054 ext4: refactoring to use the unified helper ext4_quotas_off() > d3ab1bca26b4 jbd2: recheck chechpointing non-dirty buffer > 7b0cfe40a991 jbd2: remove t_checkpoint_io_list > e86f802ab8d4 jbd2: remove journal_clean_one_cp_list() > e8ece5c78f36 jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint > cdffaad9649e jbd2: fix a race when checking checkpoint buffer busy > 11761ed6026e jbd2: remove __journal_try_to_free_buffer() > > Cheers, > > - Ted > -- Best wishes Kemeng Shi
on 6/13/2023 9:22 AM, Kemeng Shi wrote:
>
>
> on 6/12/2023 11:49 AM, Theodore Ts'o wrote:
>> On Mon, Jun 12, 2023 at 10:24:55AM +0800, Kemeng Shi wrote:
>>
>>> Hi ted, sorry for this issue. This patch added a WARN_ON for case that we free block
>>> to uninitialized block group which should be invalid.
>>> We can simply remove the WARN_ON to allow free on uninitialized block group as old
>>> way for emergency fix and I will find out why we free blocks to uninitialized block
>>> group in fast commit code path and is it a valid behavior.
>>
>> What I've done for now in the dev branch was to drop patches 12
>> through 19 of this patch series. That seemed to be a good break
>> point, and I wanted to make sure we had something working so we can
>> start doing a lot more intesive testing on the patches so far.
>>
>> Also, that way, when you resend the last 8 patches in the patch
>> series, we can make sure they get a proper review as opposed to making
>> changes on the fly.
> Sure, I will resend last 8 patches after I solve the issue. I can also take my time
> to look at problem in this way :)
Updates for how WARN_ON of free blocks to uninitialized block group is triggerred under
fast commit path in test generic/468.
# /sbin/mkfs.ext4 -F -q -O inline_data,fast_commit /dev/vdc
# /bin/mount -t ext4 -o acl,user_xattr -o block_validity /dev/vdc /vdc
# /root/xfstests/bin/xfs_io -i -f -c 'truncate 4202496' -c 'pwrite 0
4202496' -c fsync -c 'falloc 4202496 104857600' /vdc/testfile
The "falloc 4202496 104857600" will trigger block allocation in a
new uninitialized block group for file range "4202496 104857600" as
following:
ext4_map_blocks
/*
* Alloc blocks from uninitialized block group. Change to set
* group intialized will be full journaled.
*/
ext4_mb_new_blocks
[...]
/*
* New extents will be tracked in fast commit.
*/
ext4_fc_track_range
/*
* Add new extents of allocated range to inode which still has sapce
* in ext_inode_hdr
*/
ext4_ext_insert_extent
[...]
/*
* depth is 0 as inode has space in ext_inode_hdr, this will track
* inode in fast commit.
*/
ext4_ext_dirty(handle, inode, path + path->p_depth);
ext4_mark_inode_dirty
ext4_mark_iloc_dirty
ext4_fc_track_inode
# /root/xfstests/bin/xfs_io -i -c fsync /vdc/testfile
The fast commit is performed in fsync as following:
vfs_fsync
ext4_fsync_journal
ext4_fc_commit
ext4_fc_perform_commit
add EXT4_FC_TAG_ADD_RANGE of new extent range
add EXT4_FC_TAG_INODE of changed inode
# /root/xfstests/src/godown /vdc
Journaled change to set group intialized is discard as following:
ext4_shutdown
jbd2_journal_abort
# /bin/umount /dev/vdc
# /bin/mount -t ext4 -o acl,user_xattr -o block_validity /dev/vdc /vdc
Replay fast commit when mounting and added WARN_ON is triggered as
following:
ext4_fc_replay
/*
* replay EXT4_FC_TAG_ADD_RANGE, add extents contains blocks from
* uninitialized group back to inode
*/
ext4_fc_replay_add_range
/*
* replay EXT4_FC_TAG_INODE, this will mark trigger WARN_ON
*/
ext4_fc_replay_inode
/*
* mark all blocks in old inode free, then blocks from uninitialized
* block is freed and WARN_ON occurs
*/
ext4_ext_clear_bb
/* update inode with data journaled in fast commit */
[...]
/*
* mark all blocks in new inode in use, and gdp will be mark
* initialized normally
*/
ext4_fc_record_modified_inode
[...]
ext4_fc_set_bitmaps_and_counters
In this situation, free blocks to uninitialized block group do no harm.
And there may be more harmless situations, so I would like to simply
drop WARN_ON in next version.
--
Best wishes
Kemeng Shi
© 2016 - 2025 Red Hat, Inc.