fs/ocfs2/suballoc.c | 14 +++++++++++--- fs/ocfs2/suballoc.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-)
The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
fragmentation is high") introduced a regression. In the discontiguous
extent allocation case, ocfs2_cluster_group_search() is comparing with
the wrong target length, which causes allocation failure.
Call stack:
ocfs2_mkdir()
ocfs2_reserve_new_inode()
ocfs2_reserve_suballoc_bits()
ocfs2_block_group_alloc()
ocfs2_block_group_alloc_discontig()
__ocfs2_claim_clusters()
ocfs2_claim_suballoc_bits()
ocfs2_search_chain()
ocfs2_cluster_group_search()
Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
---
fs/ocfs2/suballoc.c | 14 +++++++++++---
fs/ocfs2/suballoc.h | 1 +
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f7b483f0de2a..3dea082f6e91 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
ac, cl);
- if (PTR_ERR(bg_bh) == -ENOSPC)
+ if (PTR_ERR(bg_bh) == -ENOSPC) {
+ ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
bg_bh = ocfs2_block_group_alloc_discontig(handle,
alloc_inode,
ac, cl);
+ }
if (IS_ERR(bg_bh)) {
status = PTR_ERR(bg_bh);
bg_bh = NULL;
@@ -2365,7 +2367,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
- && ac->ac_which != OCFS2_AC_USE_MAIN);
+ && ac->ac_which != OCFS2_AC_USE_MAIN
+ && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
WARN_ON(min_clusters > 1);
@@ -2427,7 +2430,12 @@ int ocfs2_claim_clusters(handle_t *handle,
u32 *cluster_start,
u32 *num_clusters)
{
- unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+ unsigned int bits_wanted;
+
+ if (ac->ac_which == OCFS2_AC_USE_MAIN)
+ bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+ else /* ac_which == OCFS2_AC_USE_MAIN_DISCONTIG */
+ bits_wanted = min_clusters;
return __ocfs2_claim_clusters(handle, ac, min_clusters,
bits_wanted, cluster_start, num_clusters);
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index b481b834857d..bcf2ed4a8631 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
#define OCFS2_AC_USE_MAIN 2
#define OCFS2_AC_USE_INODE 3
#define OCFS2_AC_USE_META 4
+#define OCFS2_AC_USE_MAIN_DISCONTIG 5
u32 ac_which;
/* these are used by the chain search */
--
2.43.0
On 2025/3/27 14:22, Heming Zhao wrote:
> The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
> fragmentation is high") introduced a regression. In the discontiguous
> extent allocation case, ocfs2_cluster_group_search() is comparing with
> the wrong target length, which causes allocation failure.
>
> Call stack:
> ocfs2_mkdir()
> ocfs2_reserve_new_inode()
> ocfs2_reserve_suballoc_bits()
> ocfs2_block_group_alloc()
> ocfs2_block_group_alloc_discontig()
> __ocfs2_claim_clusters()
> ocfs2_claim_suballoc_bits()
> ocfs2_search_chain()
> ocfs2_cluster_group_search()
>
> Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
> Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> ---
> fs/ocfs2/suballoc.c | 14 +++++++++++---
> fs/ocfs2/suballoc.h | 1 +
> 2 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index f7b483f0de2a..3dea082f6e91 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
>
> bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
> ac, cl);
> - if (PTR_ERR(bg_bh) == -ENOSPC)
> + if (PTR_ERR(bg_bh) == -ENOSPC) {
> + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
> bg_bh = ocfs2_block_group_alloc_discontig(handle,
> alloc_inode,
> ac, cl);
> + }
> if (IS_ERR(bg_bh)) {
> status = PTR_ERR(bg_bh);
> bg_bh = NULL;
> @@ -2365,7 +2367,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
> BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
>
> BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
> - && ac->ac_which != OCFS2_AC_USE_MAIN);
> + && ac->ac_which != OCFS2_AC_USE_MAIN
> + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
>
> if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
> WARN_ON(min_clusters > 1);
> @@ -2427,7 +2430,12 @@ int ocfs2_claim_clusters(handle_t *handle,
> u32 *cluster_start,
> u32 *num_clusters)
> {
> - unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
> + unsigned int bits_wanted;
> +
> + if (ac->ac_which == OCFS2_AC_USE_MAIN)
> + bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
> + else /* ac_which == OCFS2_AC_USE_MAIN_DISCONTIG */
> + bits_wanted = min_clusters;
This looks wried. Why can not be other alloc type?
Or it seems you intend to:
unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG)
bits_wanted = min_clusters;
Thanks,
Joseph
>
> return __ocfs2_claim_clusters(handle, ac, min_clusters,
> bits_wanted, cluster_start, num_clusters);
> diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
> index b481b834857d..bcf2ed4a8631 100644
> --- a/fs/ocfs2/suballoc.h
> +++ b/fs/ocfs2/suballoc.h
> @@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
> #define OCFS2_AC_USE_MAIN 2
> #define OCFS2_AC_USE_INODE 3
> #define OCFS2_AC_USE_META 4
> +#define OCFS2_AC_USE_MAIN_DISCONTIG 5
> u32 ac_which;
>
> /* these are used by the chain search */
On 4/1/25 19:38, Joseph Qi wrote:
>
>
> On 2025/3/27 14:22, Heming Zhao wrote:
>> The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
>> fragmentation is high") introduced a regression. In the discontiguous
>> extent allocation case, ocfs2_cluster_group_search() is comparing with
>> the wrong target length, which causes allocation failure.
>>
>> Call stack:
>> ocfs2_mkdir()
>> ocfs2_reserve_new_inode()
>> ocfs2_reserve_suballoc_bits()
>> ocfs2_block_group_alloc()
>> ocfs2_block_group_alloc_discontig()
>> __ocfs2_claim_clusters()
>> ocfs2_claim_suballoc_bits()
>> ocfs2_search_chain()
>> ocfs2_cluster_group_search()
>>
>> Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
>> Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>> ---
>> fs/ocfs2/suballoc.c | 14 +++++++++++---
>> fs/ocfs2/suballoc.h | 1 +
>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
>> index f7b483f0de2a..3dea082f6e91 100644
>> --- a/fs/ocfs2/suballoc.c
>> +++ b/fs/ocfs2/suballoc.c
>> @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
>>
>> bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
>> ac, cl);
>> - if (PTR_ERR(bg_bh) == -ENOSPC)
>> + if (PTR_ERR(bg_bh) == -ENOSPC) {
>> + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
>> bg_bh = ocfs2_block_group_alloc_discontig(handle,
>> alloc_inode,
>> ac, cl);
>> + }
>> if (IS_ERR(bg_bh)) {
>> status = PTR_ERR(bg_bh);
>> bg_bh = NULL;
>> @@ -2365,7 +2367,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
>> BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
>>
>> BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
>> - && ac->ac_which != OCFS2_AC_USE_MAIN);
>> + && ac->ac_which != OCFS2_AC_USE_MAIN
>> + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
>>
>> if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
>> WARN_ON(min_clusters > 1);
>> @@ -2427,7 +2430,12 @@ int ocfs2_claim_clusters(handle_t *handle,
>> u32 *cluster_start,
>> u32 *num_clusters)
>> {
>> - unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
>> + unsigned int bits_wanted;
>> +
>> + if (ac->ac_which == OCFS2_AC_USE_MAIN)
>> + bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
>> + else /* ac_which == OCFS2_AC_USE_MAIN_DISCONTIG */
>> + bits_wanted = min_clusters;
>
> This looks wried. Why can not be other alloc type?
>
> Or it seems you intend to:
>
> unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
>
> if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG)
> bits_wanted = min_clusters;
>
> Thanks,
> Joseph
Yes, you are right.
We should use OCFS2_AC_USE_MAIN_DISCONTIG to handle the special discontig case.
Will send v2 patch for this mistake.
- Heming
>
>>
>> return __ocfs2_claim_clusters(handle, ac, min_clusters,
>> bits_wanted, cluster_start, num_clusters);
>> diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
>> index b481b834857d..bcf2ed4a8631 100644
>> --- a/fs/ocfs2/suballoc.h
>> +++ b/fs/ocfs2/suballoc.h
>> @@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
>> #define OCFS2_AC_USE_MAIN 2
>> #define OCFS2_AC_USE_INODE 3
>> #define OCFS2_AC_USE_META 4
>> +#define OCFS2_AC_USE_MAIN_DISCONTIG 5
>> u32 ac_which;
>>
>> /* these are used by the chain search */
>
Hello list,
I wrote this patch based on Gautham's patch code logic. Because I lack a
test script to verify this patch, this patch only passes compilation.
btw, another topic (unrelated to this bug) is that ocfs2-test fails to run
on the latest Linux distributions (e.g., openSUSE Tumbleweed). I am focusing
on fixing this problem, but it requires some time. I have created a personal
repository [1]. Once I finish the verification process for ocfs2-test, I will
submit a PR to the upstream GitHub repository [2] (the suse-py3 branch).
[1]: https://build.opensuse.org/package/show/home:hmzhao:branches:network:ha-clustering:Factory/ocfs2-test
[2]: https://github.com/markfasheh/ocfs2-test
- Heming
On 3/27/25 14:22, Heming Zhao wrote:
> The commit 4eb7b93e0310 ("ocfs2: improve write IO performance when
> fragmentation is high") introduced a regression. In the discontiguous
> extent allocation case, ocfs2_cluster_group_search() is comparing with
> the wrong target length, which causes allocation failure.
>
> Call stack:
> ocfs2_mkdir()
> ocfs2_reserve_new_inode()
> ocfs2_reserve_suballoc_bits()
> ocfs2_block_group_alloc()
> ocfs2_block_group_alloc_discontig()
> __ocfs2_claim_clusters()
> ocfs2_claim_suballoc_bits()
> ocfs2_search_chain()
> ocfs2_cluster_group_search()
>
> Reported-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
> Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high")
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> ---
> fs/ocfs2/suballoc.c | 14 +++++++++++---
> fs/ocfs2/suballoc.h | 1 +
> 2 files changed, 12 insertions(+), 3 deletions(-)
>
© 2016 - 2025 Red Hat, Inc.