On very fast but small devices, waiting for a transaction commit can be
too long of a wait in order to wake up the cleaner kthread to remove unused
and reclaimable block-groups.
Check every time we're adding back free space to a block group, if we need
to activate the cleaner kthread.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/free-space-cache.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d372c7ce0e6b..2d98b9ca0e83 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -30,6 +30,7 @@
#include "file-item.h"
#include "file.h"
#include "super.h"
+#include "zoned.h"
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
#define MAX_CACHE_BYTES_PER_GIG SZ_64K
@@ -2694,6 +2695,7 @@ int __btrfs_add_free_space(struct btrfs_block_group *block_group,
static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
u64 bytenr, u64 size, bool used)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_space_info *sinfo = block_group->space_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
u64 offset = bytenr - block_group->start;
@@ -2745,6 +2747,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
btrfs_mark_bg_to_reclaim(block_group);
}
+ if (btrfs_zoned_should_reclaim(fs_info) &&
+ !test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
+ wake_up_process(fs_info->cleaner_kthread);
+
return 0;
}
--
2.43.0
On Mon, Jan 22, 2024 at 02:51:04AM -0800, Johannes Thumshirn wrote:
> On very fast but small devices, waiting for a transaction commit can be
> too long of a wait in order to wake up the cleaner kthread to remove unused
> and reclaimable block-groups.
>
> Check every time we're adding back free space to a block group, if we need
> to activate the cleaner kthread.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
> fs/btrfs/free-space-cache.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index d372c7ce0e6b..2d98b9ca0e83 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -30,6 +30,7 @@
> #include "file-item.h"
> #include "file.h"
> #include "super.h"
> +#include "zoned.h"
>
> #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
> #define MAX_CACHE_BYTES_PER_GIG SZ_64K
> @@ -2694,6 +2695,7 @@ int __btrfs_add_free_space(struct btrfs_block_group *block_group,
> static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> u64 bytenr, u64 size, bool used)
> {
I thought add_free_space are only called from various error/backout
conditions and then for real from unpin_extent_range, which is also in
the transaction commit.
The normal reclaim/unused decision is made in btrfs_update_block_group
for that reason.
OTOH, I assume you had some repro that was performing poorly and this
patch fixed it so, I am very likely missing something.
Thanks,
Boris
> + struct btrfs_fs_info *fs_info = block_group->fs_info;
> struct btrfs_space_info *sinfo = block_group->space_info;
> struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> u64 offset = bytenr - block_group->start;
> @@ -2745,6 +2747,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> btrfs_mark_bg_to_reclaim(block_group);
> }
>
> + if (btrfs_zoned_should_reclaim(fs_info) &&
> + !test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
> + wake_up_process(fs_info->cleaner_kthread);
> +
> return 0;
> }
>
>
> --
> 2.43.0
>
On Mon, Jan 22, 2024 at 02:51:04AM -0800, Johannes Thumshirn wrote:
> On very fast but small devices, waiting for a transaction commit can be
> too long of a wait in order to wake up the cleaner kthread to remove unused
> and reclaimable block-groups.
>
> Check every time we're adding back free space to a block group, if we need
> to activate the cleaner kthread.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
> fs/btrfs/free-space-cache.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index d372c7ce0e6b..2d98b9ca0e83 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -30,6 +30,7 @@
> #include "file-item.h"
> #include "file.h"
> #include "super.h"
> +#include "zoned.h"
>
> #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
> #define MAX_CACHE_BYTES_PER_GIG SZ_64K
> @@ -2694,6 +2695,7 @@ int __btrfs_add_free_space(struct btrfs_block_group *block_group,
> static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> u64 bytenr, u64 size, bool used)
> {
> + struct btrfs_fs_info *fs_info = block_group->fs_info;
> struct btrfs_space_info *sinfo = block_group->space_info;
> struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> u64 offset = bytenr - block_group->start;
> @@ -2745,6 +2747,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> btrfs_mark_bg_to_reclaim(block_group);
> }
>
> + if (btrfs_zoned_should_reclaim(fs_info) &&
> + !test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
> + wake_up_process(fs_info->cleaner_kthread);
> +
Isn't it too costly to call btrfs_zoned_should_reclaim() every time
something updated? Can we wake up it in btrfs_mark_bg_to_reclaim and
btrfs_mark_bg_unused ?
Also, looking into btrfs_zoned_should_reclaim(), it sums device->bytes_used
for each fs_devices->devices. And, device->bytes_used is set at
create_chunk() or at btrfs_remove_chunk(). Isn't it feasible to do the
calculation only there?
> return 0;
> }
>
>
> --
> 2.43.0
>
On 22.01.24 13:22, Naohiro Aota wrote:
> On Mon, Jan 22, 2024 at 02:51:04AM -0800, Johannes Thumshirn wrote:
>> On very fast but small devices, waiting for a transaction commit can be
>> too long of a wait in order to wake up the cleaner kthread to remove unused
>> and reclaimable block-groups.
>>
>> Check every time we're adding back free space to a block group, if we need
>> to activate the cleaner kthread.
>>
>> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>> ---
>> fs/btrfs/free-space-cache.c | 6 ++++++
>> 1 file changed, 6 insertions(+)
>>
>> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
>> index d372c7ce0e6b..2d98b9ca0e83 100644
>> --- a/fs/btrfs/free-space-cache.c
>> +++ b/fs/btrfs/free-space-cache.c
>> @@ -30,6 +30,7 @@
>> #include "file-item.h"
>> #include "file.h"
>> #include "super.h"
>> +#include "zoned.h"
>>
>> #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
>> #define MAX_CACHE_BYTES_PER_GIG SZ_64K
>> @@ -2694,6 +2695,7 @@ int __btrfs_add_free_space(struct btrfs_block_group *block_group,
>> static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
>> u64 bytenr, u64 size, bool used)
>> {
>> + struct btrfs_fs_info *fs_info = block_group->fs_info;
>> struct btrfs_space_info *sinfo = block_group->space_info;
>> struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
>> u64 offset = bytenr - block_group->start;
>> @@ -2745,6 +2747,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
>> btrfs_mark_bg_to_reclaim(block_group);
>> }
>>
>> + if (btrfs_zoned_should_reclaim(fs_info) &&
>> + !test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
>> + wake_up_process(fs_info->cleaner_kthread);
>> +
>
> Isn't it too costly to call btrfs_zoned_should_reclaim() every time
> something updated? Can we wake up it in btrfs_mark_bg_to_reclaim and
> btrfs_mark_bg_unused ?
Hmm yes, I've thought of adding a list_count() for the reclaim and
unused lists, and only calling into should_reclaim if these lists have
entries. Or even better !list_is_singular().
>
> Also, looking into btrfs_zoned_should_reclaim(), it sums device->bytes_used
> for each fs_devices->devices. And, device->bytes_used is set at
> create_chunk() or at btrfs_remove_chunk(). Isn't it feasible to do the
> calculation only there?
Oh sh*t! Right we should check bytes_used from all space_infos in
btrfs_zoned_should_reclaim() and compare that to the disk total bytes.
On Mon, Jan 22, 2024 at 12:30:52PM +0000, Johannes Thumshirn wrote:
> On 22.01.24 13:22, Naohiro Aota wrote:
> > On Mon, Jan 22, 2024 at 02:51:04AM -0800, Johannes Thumshirn wrote:
> >> On very fast but small devices, waiting for a transaction commit can be
> >> too long of a wait in order to wake up the cleaner kthread to remove unused
> >> and reclaimable block-groups.
> >>
> >> Check every time we're adding back free space to a block group, if we need
> >> to activate the cleaner kthread.
> >>
> >> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> >> ---
> >> fs/btrfs/free-space-cache.c | 6 ++++++
> >> 1 file changed, 6 insertions(+)
> >>
> >> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> >> index d372c7ce0e6b..2d98b9ca0e83 100644
> >> --- a/fs/btrfs/free-space-cache.c
> >> +++ b/fs/btrfs/free-space-cache.c
> >> @@ -30,6 +30,7 @@
> >> #include "file-item.h"
> >> #include "file.h"
> >> #include "super.h"
> >> +#include "zoned.h"
> >>
> >> #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
> >> #define MAX_CACHE_BYTES_PER_GIG SZ_64K
> >> @@ -2694,6 +2695,7 @@ int __btrfs_add_free_space(struct btrfs_block_group *block_group,
> >> static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> >> u64 bytenr, u64 size, bool used)
> >> {
> >> + struct btrfs_fs_info *fs_info = block_group->fs_info;
> >> struct btrfs_space_info *sinfo = block_group->space_info;
> >> struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> >> u64 offset = bytenr - block_group->start;
> >> @@ -2745,6 +2747,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
> >> btrfs_mark_bg_to_reclaim(block_group);
> >> }
> >>
> >> + if (btrfs_zoned_should_reclaim(fs_info) &&
> >> + !test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
> >> + wake_up_process(fs_info->cleaner_kthread);
> >> +
> >
> > Isn't it too costly to call btrfs_zoned_should_reclaim() every time
> > something updated? Can we wake up it in btrfs_mark_bg_to_reclaim and
> > btrfs_mark_bg_unused ?
>
> Hmm yes, I've thought of adding a list_count() for the reclaim and
> unused lists, and only calling into should_reclaim if these lists have
> entries. Or even better !list_is_singular().
That sounds reasonable.
> >
> > Also, looking into btrfs_zoned_should_reclaim(), it sums device->bytes_used
> > for each fs_devices->devices. And, device->bytes_used is set at
> > create_chunk() or at btrfs_remove_chunk(). Isn't it feasible to do the
> > calculation only there?
>
> Oh sh*t! Right we should check bytes_used from all space_infos in
> btrfs_zoned_should_reclaim() and compare that to the disk total bytes.
You mean device->bytes_used? space_info->bytes_used does not count free
space and zone_unusable in BGs, so using that changes the behavior. Even,
it won't kick the thread if there are many zone_unusable but small used
space.
>
On 22.01.24 15:39, Naohiro Aota wrote:
>>>
>>> Also, looking into btrfs_zoned_should_reclaim(), it sums device->bytes_used
>>> for each fs_devices->devices. And, device->bytes_used is set at
>>> create_chunk() or at btrfs_remove_chunk(). Isn't it feasible to do the
>>> calculation only there?
>>
>> Oh sh*t! Right we should check bytes_used from all space_infos in
>> btrfs_zoned_should_reclaim() and compare that to the disk total bytes.
>
> You mean device->bytes_used? space_info->bytes_used does not count free
> space and zone_unusable in BGs, so using that changes the behavior. Even,
> it won't kick the thread if there are many zone_unusable but small used
> space.
>
I did mean btrfs_space_info_used():
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index b7e7b5a5a6fa..d5242c96c97c 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2414,6 +2414,7 @@ bool btrfs_zoned_should_reclaim(struct
btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
+ struct btrfs_space_info *space_info;
u64 used = 0;
u64 total = 0;
u64 factor;
@@ -2429,10 +2430,15 @@ bool btrfs_zoned_should_reclaim(struct
btrfs_fs_info *fs_info)
continue;
total += device->disk_total_bytes;
- used += device->bytes_used;
}
rcu_read_unlock();
+ list_for_each_entry(space_info, &fs_info->space_info, list) {
+ spin_lock(&space_info->lock);
+ used += btrfs_space_info_used(space_info, true);
+ spin_unlock(&space_info->lock);
+ }
+
factor = div64_u64(used * 100, total);
return factor >= fs_info->bg_reclaim_threshold;
}
On Mon, Jan 22, 2024 at 02:43:03PM +0000, Johannes Thumshirn wrote:
> On 22.01.24 15:39, Naohiro Aota wrote:
> >>>
> >>> Also, looking into btrfs_zoned_should_reclaim(), it sums device->bytes_used
> >>> for each fs_devices->devices. And, device->bytes_used is set at
> >>> create_chunk() or at btrfs_remove_chunk(). Isn't it feasible to do the
> >>> calculation only there?
> >>
> >> Oh sh*t! Right we should check bytes_used from all space_infos in
> >> btrfs_zoned_should_reclaim() and compare that to the disk total bytes.
> >
> > You mean device->bytes_used? space_info->bytes_used does not count free
> > space and zone_unusable in BGs, so using that changes the behavior. Even,
> > it won't kick the thread if there are many zone_unusable but small used
> > space.
> >
>
> I did mean btrfs_space_info_used():
>
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index b7e7b5a5a6fa..d5242c96c97c 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -2414,6 +2414,7 @@ bool btrfs_zoned_should_reclaim(struct
> btrfs_fs_info *fs_info)
> {
> struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> struct btrfs_device *device;
> + struct btrfs_space_info *space_info;
> u64 used = 0;
> u64 total = 0;
> u64 factor;
> @@ -2429,10 +2430,15 @@ bool btrfs_zoned_should_reclaim(struct
> btrfs_fs_info *fs_info)
> continue;
>
> total += device->disk_total_bytes;
> - used += device->bytes_used;
> }
> rcu_read_unlock();
>
> + list_for_each_entry(space_info, &fs_info->space_info, list) {
> + spin_lock(&space_info->lock);
> + used += btrfs_space_info_used(space_info, true);
> + spin_unlock(&space_info->lock);
> + }
> +
> factor = div64_u64(used * 100, total);
> return factor >= fs_info->bg_reclaim_threshold;
> }
>
This changes the behavior. btrfs_space_info_used() excludes unallocated
space.
Also, if we calculate it with device->disk_total_bytes, it screws up on
DUP/RAID* profile because btrfs_space_info_used() counts the logical space
vs disk_total_bytes counts the physical space.
© 2016 - 2025 Red Hat, Inc.