After ocfs2 gained the ability to reclaim suballocator free block
group (BGs), a suballocator block group may be released. This change
causes the xfstest case generic/426 to fail.
generic/426 expects return value -ENOENT or -ESTALE, but the current
code triggers -EROFS.
Call stack before ocfs2 gained the ability to reclaim bg:
ocfs2_fh_to_dentry //or ocfs2_fh_to_parent
ocfs2_get_dentry
+ ocfs2_test_inode_bit
| ocfs2_test_suballoc_bit
| + ocfs2_read_group_descriptor //Since ocfs2 never releases the bg,
| | //the bg block was always found.
| + *res = ocfs2_test_bit //unlink was called, and the bit is zero
|
+ if (!set) //because the above *res is 0
status = -ESTALE //the generic/426 expected return value
Current call stack that triggers -EROFS:
ocfs2_get_dentry
ocfs2_test_inode_bit
ocfs2_test_suballoc_bit
ocfs2_read_group_descriptor
+ if reading a released bg, validation fails and triggers -EROFS
How to fix:
Since the read BG is already released, we must avoid triggering -EROFS.
With this commit, we use ocfs2_read_hint_group_descriptor() to detect
the released BG block. This approach quietly handles this type of error
and returns -EINVAL, which triggers the caller's existing conversion
path to -ESTALE.
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Reviewed-by: Su Yue <glass.su@suse.com>
---
fs/ocfs2/export.c | 6 ++++--
fs/ocfs2/suballoc.c | 28 ++++++++++++++++++----------
2 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index b95724b767e1..9c2665dd24e2 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -74,8 +74,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
* nice
*/
status = -ESTALE;
- } else
+ } else if (status != -ESTALE) {
mlog(ML_ERROR, "test inode bit failed %d\n", status);
+ }
goto unlock_nfs_sync;
}
@@ -162,8 +163,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
if (status < 0) {
if (status == -EINVAL) {
status = -ESTALE;
- } else
+ } else if (status != -ESTALE) {
mlog(ML_ERROR, "test inode bit failed %d\n", status);
+ }
parent = ERR_PTR(status);
goto bail_unlock;
}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 9a19f5230c8c..ddcfa6e001e8 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -3152,7 +3152,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
struct ocfs2_group_desc *group;
struct buffer_head *group_bh = NULL;
u64 bg_blkno;
- int status;
+ int status, quiet = 0, released;
trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
(unsigned int)bit);
@@ -3168,11 +3168,15 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
bg_blkno = group_blkno ? group_blkno :
ocfs2_which_suballoc_group(blkno, bit);
- status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
- &group_bh);
- if (status < 0) {
+ status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno,
+ &group_bh, &released);
+ if (released) {
+ quiet = 1;
+ status = -ESTALE;
+ goto bail;
+ } else if (status < 0) {
mlog(ML_ERROR, "read group %llu failed %d\n",
- (unsigned long long)bg_blkno, status);
+ (unsigned long long)bg_blkno, status);
goto bail;
}
@@ -3182,7 +3186,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
bail:
brelse(group_bh);
- if (status)
+ if (status && (!quiet))
mlog_errno(status);
return status;
}
@@ -3202,7 +3206,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
*/
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
{
- int status;
+ int status, quiet = 0;
u64 group_blkno = 0;
u16 suballoc_bit = 0, suballoc_slot = 0;
struct inode *inode_alloc_inode;
@@ -3244,8 +3248,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
group_blkno, blkno, suballoc_bit, res);
- if (status < 0)
- mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
+ if (status < 0) {
+ if (status == -ESTALE)
+ quiet = 1;
+ else
+ mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
+ }
ocfs2_inode_unlock(inode_alloc_inode, 0);
inode_unlock(inode_alloc_inode);
@@ -3253,7 +3261,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
iput(inode_alloc_inode);
brelse(alloc_bh);
bail:
- if (status)
+ if (status && !quiet)
mlog_errno(status);
return status;
}
--
2.43.0
On 2025/12/12 15:00, Heming Zhao wrote:
> After ocfs2 gained the ability to reclaim suballocator free block
> group (BGs), a suballocator block group may be released. This change
> causes the xfstest case generic/426 to fail.
>
> generic/426 expects return value -ENOENT or -ESTALE, but the current
> code triggers -EROFS.
>
> Call stack before ocfs2 gained the ability to reclaim bg:
>
> ocfs2_fh_to_dentry //or ocfs2_fh_to_parent
> ocfs2_get_dentry
> + ocfs2_test_inode_bit
> | ocfs2_test_suballoc_bit
> | + ocfs2_read_group_descriptor //Since ocfs2 never releases the bg,
> | | //the bg block was always found.
> | + *res = ocfs2_test_bit //unlink was called, and the bit is zero
> |
> + if (!set) //because the above *res is 0
> status = -ESTALE //the generic/426 expected return value
>
> Current call stack that triggers -EROFS:
>
> ocfs2_get_dentry
> ocfs2_test_inode_bit
> ocfs2_test_suballoc_bit
> ocfs2_read_group_descriptor
> + if reading a released bg, validation fails and triggers -EROFS
>
> How to fix:
> Since the read BG is already released, we must avoid triggering -EROFS.
> With this commit, we use ocfs2_read_hint_group_descriptor() to detect
> the released BG block. This approach quietly handles this type of error
> and returns -EINVAL, which triggers the caller's existing conversion
> path to -ESTALE.
>
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> Reviewed-by: Su Yue <glass.su@suse.com>
> ---
> fs/ocfs2/export.c | 6 ++++--
> fs/ocfs2/suballoc.c | 28 ++++++++++++++++++----------
> 2 files changed, 22 insertions(+), 12 deletions(-)
>
> diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
> index b95724b767e1..9c2665dd24e2 100644
> --- a/fs/ocfs2/export.c
> +++ b/fs/ocfs2/export.c
> @@ -74,8 +74,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
> * nice
> */
> status = -ESTALE;
> - } else
> + } else if (status != -ESTALE) {
> mlog(ML_ERROR, "test inode bit failed %d\n", status);
> + }
> goto unlock_nfs_sync;
> }
>
> @@ -162,8 +163,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
> if (status < 0) {
> if (status == -EINVAL) {
> status = -ESTALE;
> - } else
> + } else if (status != -ESTALE) {
> mlog(ML_ERROR, "test inode bit failed %d\n", status);
> + }
> parent = ERR_PTR(status);
> goto bail_unlock;
> }
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index 9a19f5230c8c..ddcfa6e001e8 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -3152,7 +3152,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> struct ocfs2_group_desc *group;
> struct buffer_head *group_bh = NULL;
> u64 bg_blkno;
> - int status;
> + int status, quiet = 0, released;
>
> trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
> (unsigned int)bit);
> @@ -3168,11 +3168,15 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
>
> bg_blkno = group_blkno ? group_blkno :
> ocfs2_which_suballoc_group(blkno, bit);
> - status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
> - &group_bh);
> - if (status < 0) {
> + status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno,
> + &group_bh, &released);
> + if (released) {
> + quiet = 1;
> + status = -ESTALE;
> + goto bail;
> + } else if (status < 0) {
> mlog(ML_ERROR, "read group %llu failed %d\n",
> - (unsigned long long)bg_blkno, status);
> + (unsigned long long)bg_blkno, status);
This can be kept untouched.
> goto bail;
> }
>
> @@ -3182,7 +3186,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> bail:
> brelse(group_bh);
>
> - if (status)
> + if (status && (!quiet))
'!quiet' is enough, the parentheses is unneeded.
Other looks good to me.
Thanks,
Joseph
> mlog_errno(status);
> return status;
> }
> @@ -3202,7 +3206,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> */
> int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> {
> - int status;
> + int status, quiet = 0;
> u64 group_blkno = 0;
> u16 suballoc_bit = 0, suballoc_slot = 0;
> struct inode *inode_alloc_inode;
> @@ -3244,8 +3248,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
>
> status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
> group_blkno, blkno, suballoc_bit, res);
> - if (status < 0)
> - mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> + if (status < 0) {
> + if (status == -ESTALE)
> + quiet = 1;
> + else
> + mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> + }
>
> ocfs2_inode_unlock(inode_alloc_inode, 0);
> inode_unlock(inode_alloc_inode);
> @@ -3253,7 +3261,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> iput(inode_alloc_inode);
> brelse(alloc_bh);
> bail:
> - if (status)
> + if (status && !quiet)
> mlog_errno(status);
> return status;
> }
On Fri, Dec 12, 2025 at 03:12:53PM +0800, Joseph Qi wrote:
>
>
> On 2025/12/12 15:00, Heming Zhao wrote:
> > After ocfs2 gained the ability to reclaim suballocator free block
> > group (BGs), a suballocator block group may be released. This change
> > causes the xfstest case generic/426 to fail.
> >
> > generic/426 expects return value -ENOENT or -ESTALE, but the current
> > code triggers -EROFS.
> >
> > Call stack before ocfs2 gained the ability to reclaim bg:
> >
> > ocfs2_fh_to_dentry //or ocfs2_fh_to_parent
> > ocfs2_get_dentry
> > + ocfs2_test_inode_bit
> > | ocfs2_test_suballoc_bit
> > | + ocfs2_read_group_descriptor //Since ocfs2 never releases the bg,
> > | | //the bg block was always found.
> > | + *res = ocfs2_test_bit //unlink was called, and the bit is zero
> > |
> > + if (!set) //because the above *res is 0
> > status = -ESTALE //the generic/426 expected return value
> >
> > Current call stack that triggers -EROFS:
> >
> > ocfs2_get_dentry
> > ocfs2_test_inode_bit
> > ocfs2_test_suballoc_bit
> > ocfs2_read_group_descriptor
> > + if reading a released bg, validation fails and triggers -EROFS
> >
> > How to fix:
> > Since the read BG is already released, we must avoid triggering -EROFS.
> > With this commit, we use ocfs2_read_hint_group_descriptor() to detect
> > the released BG block. This approach quietly handles this type of error
> > and returns -EINVAL, which triggers the caller's existing conversion
> > path to -ESTALE.
> >
> > Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> > Reviewed-by: Su Yue <glass.su@suse.com>
> > ---
> > fs/ocfs2/export.c | 6 ++++--
> > fs/ocfs2/suballoc.c | 28 ++++++++++++++++++----------
> > 2 files changed, 22 insertions(+), 12 deletions(-)
> >
> > diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
> > index b95724b767e1..9c2665dd24e2 100644
> > --- a/fs/ocfs2/export.c
> > +++ b/fs/ocfs2/export.c
> > @@ -74,8 +74,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
> > * nice
> > */
> > status = -ESTALE;
> > - } else
> > + } else if (status != -ESTALE) {
> > mlog(ML_ERROR, "test inode bit failed %d\n", status);
> > + }
> > goto unlock_nfs_sync;
> > }
> >
> > @@ -162,8 +163,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
> > if (status < 0) {
> > if (status == -EINVAL) {
> > status = -ESTALE;
> > - } else
> > + } else if (status != -ESTALE) {
> > mlog(ML_ERROR, "test inode bit failed %d\n", status);
> > + }
> > parent = ERR_PTR(status);
> > goto bail_unlock;
> > }
> > diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> > index 9a19f5230c8c..ddcfa6e001e8 100644
> > --- a/fs/ocfs2/suballoc.c
> > +++ b/fs/ocfs2/suballoc.c
> > @@ -3152,7 +3152,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> > struct ocfs2_group_desc *group;
> > struct buffer_head *group_bh = NULL;
> > u64 bg_blkno;
> > - int status;
> > + int status, quiet = 0, released;
> >
> > trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
> > (unsigned int)bit);
> > @@ -3168,11 +3168,15 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> >
> > bg_blkno = group_blkno ? group_blkno :
> > ocfs2_which_suballoc_group(blkno, bit);
> > - status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
> > - &group_bh);
> > - if (status < 0) {
> > + status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno,
> > + &group_bh, &released);
> > + if (released) {
> > + quiet = 1;
> > + status = -ESTALE;
> > + goto bail;
> > + } else if (status < 0) {
> > mlog(ML_ERROR, "read group %llu failed %d\n",
> > - (unsigned long long)bg_blkno, status);
> > + (unsigned long long)bg_blkno, status);
OK
>
> This can be kept untouched.
>
> > goto bail;
> > }
> >
> > @@ -3182,7 +3186,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> > bail:
> > brelse(group_bh);
> >
> > - if (status)
> > + if (status && (!quiet))
>
> '!quiet' is enough, the parentheses is unneeded.
OK. Very interesting, this is not my code style.
Thanks for your careful review.
Heming
>
> Other looks good to me.
>
> Thanks,
> Joseph
>
> > mlog_errno(status);
> > return status;
> > }
> > @@ -3202,7 +3206,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> > */
> > int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> > {
> > - int status;
> > + int status, quiet = 0;
> > u64 group_blkno = 0;
> > u16 suballoc_bit = 0, suballoc_slot = 0;
> > struct inode *inode_alloc_inode;
> > @@ -3244,8 +3248,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> >
> > status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
> > group_blkno, blkno, suballoc_bit, res);
> > - if (status < 0)
> > - mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> > + if (status < 0) {
> > + if (status == -ESTALE)
> > + quiet = 1;
> > + else
> > + mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> > + }
> >
> > ocfs2_inode_unlock(inode_alloc_inode, 0);
> > inode_unlock(inode_alloc_inode);
> > @@ -3253,7 +3261,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> > iput(inode_alloc_inode);
> > brelse(alloc_bh);
> > bail:
> > - if (status)
> > + if (status && !quiet)
> > mlog_errno(status);
> > return status;
> > }
>
© 2016 - 2025 Red Hat, Inc.