drivers/md/bcache/super.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-)
Signed-off-by: cheliequan <cheliequan@inspur.com>
If the bcache cache disk contains damaged btree data,
when the bcache cache disk partition is directly operated,
the system-udevd service is triggered to call the bcache-register
program to register the bcache device,resulting in kernel oops.
crash> bt
PID: 7773 TASK: ffff49cc44d69340 CPU: 57 COMMAND: "kworker/57:2"
#0 [ffff800046373800] machine_kexec at ffffbe5039eb54a8
#1 [ffff8000463739b0] __crash_kexec at ffffbe503a052824
#2 [ffff8000463739e0] crash_kexec at ffffbe503a0529cc
#3 [ffff800046373a60] die at ffffbe5039e9445c
#4 [ffff800046373ac0] die_kernel_fault at ffffbe5039ec698c
#5 [ffff800046373af0] __do_kernel_fault at ffffbe5039ec6a38
#6 [ffff800046373b20] do_page_fault at ffffbe503ac76ba4
#7 [ffff800046373b70] do_translation_fault at ffffbe503ac76ebc
#8 [ffff800046373b90] do_mem_abort at ffffbe5039ec68ac
#9 [ffff800046373bc0] el1_abort at ffffbe503ac669bc
#10 [ffff800046373bf0] el1_sync_handler at ffffbe503ac671d4
#11 [ffff800046373d30] el1_sync at ffffbe5039e82230
#12 [ffff800046373d50] cache_set_flush at ffffbe50121fa4c4 [bcache]
#13 [ffff800046373da0] process_one_work at ffffbe5039f5af68
#14 [ffff800046373e00] worker_thread at ffffbe5039f5b3c4
#15 [ffff800046373e50] kthread at ffffbe5039f634b8
crash> dis cache_set_flush+0x94
0xffffbe50121fa4c8 <cache_set_flush+148>: str x23, [x20, #512]
---
drivers/md/bcache/super.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index fd97730479d8..8a41dfcf9fb6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1741,8 +1741,10 @@ static void cache_set_flush(struct closure *cl)
if (!IS_ERR_OR_NULL(c->gc_thread))
kthread_stop(c->gc_thread);
- if (!IS_ERR(c->root))
- list_add(&c->root->list, &c->btree_cache);
+ if (!IS_ERR_OR_NULL(c->root)) {
+ if (!list_empty(&c->root->list))
+ list_add(&c->root->list, &c->btree_cache);
+ }
/*
* Avoid flushing cached nodes if cache set is retiring
@@ -1750,10 +1752,12 @@ static void cache_set_flush(struct closure *cl)
*/
if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags))
list_for_each_entry(b, &c->btree_cache, list) {
- mutex_lock(&b->write_lock);
- if (btree_node_dirty(b))
- __bch_btree_node_write(b, NULL);
- mutex_unlock(&b->write_lock);
+ if (!IS_ERR_OR_NULL(b)) {
+ mutex_lock(&b->write_lock);
+ if (btree_node_dirty(b))
+ __bch_btree_node_write(b, NULL);
+ mutex_unlock(&b->write_lock);
+ }
}
if (ca->alloc_thread)
--
2.33.0
Hi Liequan, > 2024年11月13日 14:25,liequan che <liequanche@gmail.com> 写道: > > Signed-off-by: cheliequan <cheliequan@inspur.com> > > If the bcache cache disk contains damaged btree data, > when the bcache cache disk partition is directly operated, > the system-udevd service is triggered to call the bcache-register > program to register the bcache device,resulting in kernel oops. > What is the kernel version ? Interesting that why the btree node checking code during registration doesn’t cache the meta data error. > crash> bt > PID: 7773 TASK: ffff49cc44d69340 CPU: 57 COMMAND: "kworker/57:2" > #0 [ffff800046373800] machine_kexec at ffffbe5039eb54a8 > #1 [ffff8000463739b0] __crash_kexec at ffffbe503a052824 > #2 [ffff8000463739e0] crash_kexec at ffffbe503a0529cc > #3 [ffff800046373a60] die at ffffbe5039e9445c > #4 [ffff800046373ac0] die_kernel_fault at ffffbe5039ec698c > #5 [ffff800046373af0] __do_kernel_fault at ffffbe5039ec6a38 > #6 [ffff800046373b20] do_page_fault at ffffbe503ac76ba4 > #7 [ffff800046373b70] do_translation_fault at ffffbe503ac76ebc > #8 [ffff800046373b90] do_mem_abort at ffffbe5039ec68ac > #9 [ffff800046373bc0] el1_abort at ffffbe503ac669bc > #10 [ffff800046373bf0] el1_sync_handler at ffffbe503ac671d4 > #11 [ffff800046373d30] el1_sync at ffffbe5039e82230 > #12 [ffff800046373d50] cache_set_flush at ffffbe50121fa4c4 [bcache] > #13 [ffff800046373da0] process_one_work at ffffbe5039f5af68 > #14 [ffff800046373e00] worker_thread at ffffbe5039f5b3c4 > #15 [ffff800046373e50] kthread at ffffbe5039f634b8 > crash> dis cache_set_flush+0x94 > 0xffffbe50121fa4c8 <cache_set_flush+148>: str x23, [x20, #512] > > --- > drivers/md/bcache/super.c | 16 ++++++++++------ > 1 file changed, 10 insertions(+), 6 deletions(-) > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c > index fd97730479d8..8a41dfcf9fb6 100644 > --- a/drivers/md/bcache/super.c > +++ b/drivers/md/bcache/super.c > @@ -1741,8 +1741,10 @@ static void cache_set_flush(struct closure *cl) > if (!IS_ERR_OR_NULL(c->gc_thread)) > kthread_stop(c->gc_thread); > > - if (!IS_ERR(c->root)) > - list_add(&c->root->list, &c->btree_cache); > + if (!IS_ERR_OR_NULL(c->root)) { > + if (!list_empty(&c->root->list)) > + list_add(&c->root->list, &c->btree_cache); > + } > > /* > * Avoid flushing cached nodes if cache set is retiring > @@ -1750,10 +1752,12 @@ static void cache_set_flush(struct closure *cl) > */ > if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) > list_for_each_entry(b, &c->btree_cache, list) { > - mutex_lock(&b->write_lock); > - if (btree_node_dirty(b)) > - __bch_btree_node_write(b, NULL); > - mutex_unlock(&b->write_lock); > + if (!IS_ERR_OR_NULL(b)) { The above check is not safe. > + mutex_lock(&b->write_lock); > + if (btree_node_dirty(b)) > + __bch_btree_node_write(b, NULL); > + mutex_unlock(&b->write_lock); > + } > } > > if (ca->alloc_thread) > -- > 2.33.0 Thanks. Coly Li
I tested this bug on kernel versions 5.10.0-231.0.0.133 and 5.10.0-202.0.0.115。 You can get detailed information through the link below. https://gitee.com/openeuler/kernel/issues/IB3YQZ https://gitee.com/openeuler/kernel/pulls/13205 Best Regards! cheleiquan Coly Li <colyli@suse.de> 于2024年11月13日周三 15:04写道: > > Hi Liequan, > > > 2024年11月13日 14:25,liequan che <liequanche@gmail.com> 写道: > > > > Signed-off-by: cheliequan <cheliequan@inspur.com> > > > > If the bcache cache disk contains damaged btree data, > > when the bcache cache disk partition is directly operated, > > the system-udevd service is triggered to call the bcache-register > > program to register the bcache device,resulting in kernel oops. > > > > What is the kernel version ? > > Interesting that why the btree node checking code during registration doesn’t cache the meta data error. > > > > > crash> bt > > PID: 7773 TASK: ffff49cc44d69340 CPU: 57 COMMAND: "kworker/57:2" > > #0 [ffff800046373800] machine_kexec at ffffbe5039eb54a8 > > #1 [ffff8000463739b0] __crash_kexec at ffffbe503a052824 > > #2 [ffff8000463739e0] crash_kexec at ffffbe503a0529cc > > #3 [ffff800046373a60] die at ffffbe5039e9445c > > #4 [ffff800046373ac0] die_kernel_fault at ffffbe5039ec698c > > #5 [ffff800046373af0] __do_kernel_fault at ffffbe5039ec6a38 > > #6 [ffff800046373b20] do_page_fault at ffffbe503ac76ba4 > > #7 [ffff800046373b70] do_translation_fault at ffffbe503ac76ebc > > #8 [ffff800046373b90] do_mem_abort at ffffbe5039ec68ac > > #9 [ffff800046373bc0] el1_abort at ffffbe503ac669bc > > #10 [ffff800046373bf0] el1_sync_handler at ffffbe503ac671d4 > > #11 [ffff800046373d30] el1_sync at ffffbe5039e82230 > > #12 [ffff800046373d50] cache_set_flush at ffffbe50121fa4c4 [bcache] > > #13 [ffff800046373da0] process_one_work at ffffbe5039f5af68 > > #14 [ffff800046373e00] worker_thread at ffffbe5039f5b3c4 > > #15 [ffff800046373e50] kthread at ffffbe5039f634b8 > > crash> dis cache_set_flush+0x94 > > 0xffffbe50121fa4c8 <cache_set_flush+148>: str x23, [x20, #512] > > > > --- > > drivers/md/bcache/super.c | 16 ++++++++++------ > > 1 file changed, 10 insertions(+), 6 deletions(-) > > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c > > index fd97730479d8..8a41dfcf9fb6 100644 > > --- a/drivers/md/bcache/super.c > > +++ b/drivers/md/bcache/super.c > > @@ -1741,8 +1741,10 @@ static void cache_set_flush(struct closure *cl) > > if (!IS_ERR_OR_NULL(c->gc_thread)) > > kthread_stop(c->gc_thread); > > > > - if (!IS_ERR(c->root)) > > - list_add(&c->root->list, &c->btree_cache); > > + if (!IS_ERR_OR_NULL(c->root)) { > > + if (!list_empty(&c->root->list)) > > + list_add(&c->root->list, &c->btree_cache); > > + } > > > > /* > > * Avoid flushing cached nodes if cache set is retiring > > @@ -1750,10 +1752,12 @@ static void cache_set_flush(struct closure *cl) > > */ > > if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) > > list_for_each_entry(b, &c->btree_cache, list) { > > - mutex_lock(&b->write_lock); > > - if (btree_node_dirty(b)) > > - __bch_btree_node_write(b, NULL); > > - mutex_unlock(&b->write_lock); > > + if (!IS_ERR_OR_NULL(b)) { > > The above check is not safe. > > > > > + mutex_lock(&b->write_lock); > > + if (btree_node_dirty(b)) > > + __bch_btree_node_write(b, NULL); > > + mutex_unlock(&b->write_lock); > > + } > > } > > > > if (ca->alloc_thread) > > -- > > 2.33.0 > > > Thanks. > > Coly Li
Hi Coly: In addition, the following actions caused the kernel oops. After creating the BCache device, the metadata information was not cleared. Hot-plugged to another server, repartitioned nvme with a different partition size to recreate the BCache device.After the partition was executed, the kernel oops was triggered. After I applied the new patch, it did not trigger the kernel panic. For the above problems, can you give me better modification suggestions? Thanks! cheliequan liequan che <liequanche@gmail.com> 于2024年11月13日周三 15:40写道: > > I tested this bug on kernel versions 5.10.0-231.0.0.133 and 5.10.0-202.0.0.115。 > You can get detailed information through the link below. > > https://gitee.com/openeuler/kernel/issues/IB3YQZ > https://gitee.com/openeuler/kernel/pulls/13205 > Best Regards! > cheleiquan > > Coly Li <colyli@suse.de> 于2024年11月13日周三 15:04写道: > > > > Hi Liequan, > > > > > 2024年11月13日 14:25,liequan che <liequanche@gmail.com> 写道: > > > > > > Signed-off-by: cheliequan <cheliequan@inspur.com> > > > > > > If the bcache cache disk contains damaged btree data, > > > when the bcache cache disk partition is directly operated, > > > the system-udevd service is triggered to call the bcache-register > > > program to register the bcache device,resulting in kernel oops. > > > > > > > What is the kernel version ? > > > > Interesting that why the btree node checking code during registration doesn’t cache the meta data error. > > > > > > > > > crash> bt > > > PID: 7773 TASK: ffff49cc44d69340 CPU: 57 COMMAND: "kworker/57:2" > > > #0 [ffff800046373800] machine_kexec at ffffbe5039eb54a8 > > > #1 [ffff8000463739b0] __crash_kexec at ffffbe503a052824 > > > #2 [ffff8000463739e0] crash_kexec at ffffbe503a0529cc > > > #3 [ffff800046373a60] die at ffffbe5039e9445c > > > #4 [ffff800046373ac0] die_kernel_fault at ffffbe5039ec698c > > > #5 [ffff800046373af0] __do_kernel_fault at ffffbe5039ec6a38 > > > #6 [ffff800046373b20] do_page_fault at ffffbe503ac76ba4 > > > #7 [ffff800046373b70] do_translation_fault at ffffbe503ac76ebc > > > #8 [ffff800046373b90] do_mem_abort at ffffbe5039ec68ac > > > #9 [ffff800046373bc0] el1_abort at ffffbe503ac669bc > > > #10 [ffff800046373bf0] el1_sync_handler at ffffbe503ac671d4 > > > #11 [ffff800046373d30] el1_sync at ffffbe5039e82230 > > > #12 [ffff800046373d50] cache_set_flush at ffffbe50121fa4c4 [bcache] > > > #13 [ffff800046373da0] process_one_work at ffffbe5039f5af68 > > > #14 [ffff800046373e00] worker_thread at ffffbe5039f5b3c4 > > > #15 [ffff800046373e50] kthread at ffffbe5039f634b8 > > > crash> dis cache_set_flush+0x94 > > > 0xffffbe50121fa4c8 <cache_set_flush+148>: str x23, [x20, #512] > > > > > > --- > > > drivers/md/bcache/super.c | 16 ++++++++++------ > > > 1 file changed, 10 insertions(+), 6 deletions(-) > > > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c > > > index fd97730479d8..8a41dfcf9fb6 100644 > > > --- a/drivers/md/bcache/super.c > > > +++ b/drivers/md/bcache/super.c > > > @@ -1741,8 +1741,10 @@ static void cache_set_flush(struct closure *cl) > > > if (!IS_ERR_OR_NULL(c->gc_thread)) > > > kthread_stop(c->gc_thread); > > > > > > - if (!IS_ERR(c->root)) > > > - list_add(&c->root->list, &c->btree_cache); > > > + if (!IS_ERR_OR_NULL(c->root)) { > > > + if (!list_empty(&c->root->list)) > > > + list_add(&c->root->list, &c->btree_cache); > > > + } > > > > > > /* > > > * Avoid flushing cached nodes if cache set is retiring > > > @@ -1750,10 +1752,12 @@ static void cache_set_flush(struct closure *cl) > > > */ > > > if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) > > > list_for_each_entry(b, &c->btree_cache, list) { > > > - mutex_lock(&b->write_lock); > > > - if (btree_node_dirty(b)) > > > - __bch_btree_node_write(b, NULL); > > > - mutex_unlock(&b->write_lock); > > > + if (!IS_ERR_OR_NULL(b)) { > > > > The above check is not safe. > > > > > > > > > + mutex_lock(&b->write_lock); > > > + if (btree_node_dirty(b)) > > > + __bch_btree_node_write(b, NULL); > > > + mutex_unlock(&b->write_lock); > > > + } > > > } > > > > > > if (ca->alloc_thread) > > > -- > > > 2.33.0 > > > > > > Thanks. > > > > Coly Li
© 2016 - 2024 Red Hat, Inc.