__blkdev_issue_discard() always returns 0, making all error checking
in XFS discard functions dead code.
Change xfs_discard_extents() return type to void, remove error variable,
error checking, and error logging for the __blkdev_issue_discard() call
in same function.
Update xfs_trim_perag_extents() and xfs_trim_rtgroup_extents() to
ignore the xfs_discard_extents() return value and error checking
code.
Update xfs_discard_rtdev_extents() to ignore __blkdev_issue_discard()
return value and error checking code.
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chaitanya Kulkarni <ckulkarnilinux@gmail.com>
---
fs/xfs/xfs_discard.c | 27 +++++----------------------
fs/xfs/xfs_discard.h | 2 +-
2 files changed, 6 insertions(+), 23 deletions(-)
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 6917de832191..b6ffe4807a11 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -108,7 +108,7 @@ xfs_discard_endio(
* list. We plug and chain the bios so that we only need a single completion
* call to clear all the busy extents once the discards are complete.
*/
-int
+void
xfs_discard_extents(
struct xfs_mount *mp,
struct xfs_busy_extents *extents)
@@ -116,7 +116,6 @@ xfs_discard_extents(
struct xfs_extent_busy *busyp;
struct bio *bio = NULL;
struct blk_plug plug;
- int error = 0;
blk_start_plug(&plug);
list_for_each_entry(busyp, &extents->extent_list, list) {
@@ -126,18 +125,10 @@ xfs_discard_extents(
trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
- error = __blkdev_issue_discard(btp->bt_bdev,
+ __blkdev_issue_discard(btp->bt_bdev,
xfs_gbno_to_daddr(xg, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_KERNEL, &bio);
- if (error && error != -EOPNOTSUPP) {
- xfs_info(mp,
- "discard failed for extent [0x%llx,%u], error %d",
- (unsigned long long)busyp->bno,
- busyp->length,
- error);
- break;
- }
}
if (bio) {
@@ -148,8 +139,6 @@ xfs_discard_extents(
xfs_discard_endio_work(&extents->endio_work);
}
blk_finish_plug(&plug);
-
- return error;
}
/*
@@ -385,9 +374,7 @@ xfs_trim_perag_extents(
* list after this function call, as it may have been freed by
* the time control returns to us.
*/
- error = xfs_discard_extents(pag_mount(pag), extents);
- if (error)
- break;
+ xfs_discard_extents(pag_mount(pag), extents);
if (xfs_trim_should_stop())
break;
@@ -496,12 +483,10 @@ xfs_discard_rtdev_extents(
trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length);
- error = __blkdev_issue_discard(bdev,
+ __blkdev_issue_discard(bdev,
xfs_rtb_to_daddr(mp, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_NOFS, &bio);
- if (error)
- break;
}
xfs_discard_free_rtdev_extents(tr);
@@ -741,9 +726,7 @@ xfs_trim_rtgroup_extents(
* list after this function call, as it may have been freed by
* the time control returns to us.
*/
- error = xfs_discard_extents(rtg_mount(rtg), tr.extents);
- if (error)
- break;
+ xfs_discard_extents(rtg_mount(rtg), tr.extents);
low = tr.restart_rtx;
} while (!xfs_trim_should_stop() && low <= high);
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
index 2b1a85223a56..8c5cc4af6a07 100644
--- a/fs/xfs/xfs_discard.h
+++ b/fs/xfs/xfs_discard.h
@@ -6,7 +6,7 @@ struct fstrim_range;
struct xfs_mount;
struct xfs_busy_extents;
-int xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy);
+void xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy);
int xfs_ioc_trim(struct xfs_mount *mp, struct fstrim_range __user *fstrim);
#endif /* XFS_DISCARD_H */
--
2.40.0
On 11/25/25 07:48, Chaitanya Kulkarni wrote:
> __blkdev_issue_discard() always returns 0, making all error checking
> in XFS discard functions dead code.
>
> Change xfs_discard_extents() return type to void, remove error variable,
> error checking, and error logging for the __blkdev_issue_discard() call
> in same function.
>
> Update xfs_trim_perag_extents() and xfs_trim_rtgroup_extents() to
> ignore the xfs_discard_extents() return value and error checking
> code.
>
> Update xfs_discard_rtdev_extents() to ignore __blkdev_issue_discard()
> return value and error checking code.
>
> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Chaitanya Kulkarni <ckulkarnilinux@gmail.com>
> ---
> fs/xfs/xfs_discard.c | 27 +++++----------------------
> fs/xfs/xfs_discard.h | 2 +-
> 2 files changed, 6 insertions(+), 23 deletions(-)
>
> diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
> index 6917de832191..b6ffe4807a11 100644
> --- a/fs/xfs/xfs_discard.c
> +++ b/fs/xfs/xfs_discard.c
> @@ -108,7 +108,7 @@ xfs_discard_endio(
> * list. We plug and chain the bios so that we only need a single completion
> * call to clear all the busy extents once the discards are complete.
> */
> -int
> +void
> xfs_discard_extents(
> struct xfs_mount *mp,
> struct xfs_busy_extents *extents)
> @@ -116,7 +116,6 @@ xfs_discard_extents(
> struct xfs_extent_busy *busyp;
> struct bio *bio = NULL;
> struct blk_plug plug;
> - int error = 0;
>
> blk_start_plug(&plug);
> list_for_each_entry(busyp, &extents->extent_list, list) {
> @@ -126,18 +125,10 @@ xfs_discard_extents(
>
> trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
>
> - error = __blkdev_issue_discard(btp->bt_bdev,
> + __blkdev_issue_discard(btp->bt_bdev,
> xfs_gbno_to_daddr(xg, busyp->bno),
> XFS_FSB_TO_BB(mp, busyp->length),
> GFP_KERNEL, &bio);
If blk_alloc_discard_bio() fails to allocate a bio inside
__blkdev_issue_discard(), this may lead to an invalid loop in
list_for_each_entry{}. Instead of using __blkdev_issue_discard(), how
about allocate and submit the discard bios explicitly in
list_for_each_entry{}?
Yongpeng,
> - if (error && error != -EOPNOTSUPP) {
> - xfs_info(mp,
> - "discard failed for extent [0x%llx,%u], error %d",
> - (unsigned long long)busyp->bno,
> - busyp->length,
> - error);
> - break;
> - }
> }
>
> if (bio) {
> @@ -148,8 +139,6 @@ xfs_discard_extents(
> xfs_discard_endio_work(&extents->endio_work);
> }
> blk_finish_plug(&plug);
> -
> - return error;
> }
>
> /*
> @@ -385,9 +374,7 @@ xfs_trim_perag_extents(
> * list after this function call, as it may have been freed by
> * the time control returns to us.
> */
> - error = xfs_discard_extents(pag_mount(pag), extents);
> - if (error)
> - break;
> + xfs_discard_extents(pag_mount(pag), extents);
>
> if (xfs_trim_should_stop())
> break;
> @@ -496,12 +483,10 @@ xfs_discard_rtdev_extents(
>
> trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length);
>
> - error = __blkdev_issue_discard(bdev,
> + __blkdev_issue_discard(bdev,
> xfs_rtb_to_daddr(mp, busyp->bno),
> XFS_FSB_TO_BB(mp, busyp->length),
> GFP_NOFS, &bio);
> - if (error)
> - break;
> }
> xfs_discard_free_rtdev_extents(tr);
>
> @@ -741,9 +726,7 @@ xfs_trim_rtgroup_extents(
> * list after this function call, as it may have been freed by
> * the time control returns to us.
> */
> - error = xfs_discard_extents(rtg_mount(rtg), tr.extents);
> - if (error)
> - break;
> + xfs_discard_extents(rtg_mount(rtg), tr.extents);
>
> low = tr.restart_rtx;
> } while (!xfs_trim_should_stop() && low <= high);
> diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
> index 2b1a85223a56..8c5cc4af6a07 100644
> --- a/fs/xfs/xfs_discard.h
> +++ b/fs/xfs/xfs_discard.h
> @@ -6,7 +6,7 @@ struct fstrim_range;
> struct xfs_mount;
> struct xfs_busy_extents;
>
> -int xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy);
> +void xfs_discard_extents(struct xfs_mount *mp, struct xfs_busy_extents *busy);
> int xfs_ioc_trim(struct xfs_mount *mp, struct fstrim_range __user *fstrim);
>
> #endif /* XFS_DISCARD_H */
On Wed, Nov 26, 2025 at 10:37:10AM +0800, Yongpeng Yang wrote: >> xfs_gbno_to_daddr(xg, busyp->bno), >> XFS_FSB_TO_BB(mp, busyp->length), >> GFP_KERNEL, &bio); > > If blk_alloc_discard_bio() fails to allocate a bio inside > __blkdev_issue_discard(), It won't, because mempool allocations will not fail if they can sleep as they can here.
On 11/25/25 18:37, Yongpeng Yang wrote:
>> diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
>> index 6917de832191..b6ffe4807a11 100644
>> --- a/fs/xfs/xfs_discard.c
>> +++ b/fs/xfs/xfs_discard.c
>> @@ -108,7 +108,7 @@ xfs_discard_endio(
>> * list. We plug and chain the bios so that we only need a single
>> completion
>> * call to clear all the busy extents once the discards are complete.
>> */
>> -int
>> +void
>> xfs_discard_extents(
>> struct xfs_mount *mp,
>> struct xfs_busy_extents *extents)
>> @@ -116,7 +116,6 @@ xfs_discard_extents(
>> struct xfs_extent_busy *busyp;
>> struct bio *bio = NULL;
>> struct blk_plug plug;
>> - int error = 0;
>> blk_start_plug(&plug);
>> list_for_each_entry(busyp, &extents->extent_list, list) {
>> @@ -126,18 +125,10 @@ xfs_discard_extents(
>> trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
>> - error = __blkdev_issue_discard(btp->bt_bdev,
>> + __blkdev_issue_discard(btp->bt_bdev,
>> xfs_gbno_to_daddr(xg, busyp->bno),
>> XFS_FSB_TO_BB(mp, busyp->length),
>> GFP_KERNEL, &bio);
>
> If blk_alloc_discard_bio() fails to allocate a bio inside
> __blkdev_issue_discard(), this may lead to an invalid loop in
> list_for_each_entry{}. Instead of using __blkdev_issue_discard(), how
> about allocate and submit the discard bios explicitly in
> list_for_each_entry{}?
>
> Yongpeng,
Calling __blkdev_issue_discard() keeps managing all the bio with the
appropriate GFP mask, so the semantics stay the same. You are just
moving memory allocation to the caller and potentially looking at
implementing retry on bio allocation failure.
The retry for discard bio memory allocation is not desired I think,
since it's only a hint to the controller.
This patch is simply cleaning up dead error-handling branches at the
callers no behavioral changes intended.
What maybe useful is to stop iterating once we fail to allocate the
bio [1].
-ck
[1] Potential addition on the top of this to exit early in discard loop
on bio allocation failure.
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index b6ffe4807a11..1519f708bb79 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -129,6 +129,13 @@ xfs_discard_extents(
xfs_gbno_to_daddr(xg, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_KERNEL, &bio);
+ /*
+ * We failed to allocate bio instead of continuing the loop
+ * so it will lead to inconsistent discards to the disk
+ * exit early and jump into xfs_discard_busy_clear().
+ */
+ if (!bio)
+ break;
}
if (bio) {
If we keep looping after the first bio == NULL, the rest of the range is
guaranteed to be inconsistent anyways, because every subsequent iteration
will fall into one of three cases:
- The allocator keeps returning NULL, so none of the remaining LBAs receive
discard.
- Rest of the allocator succeeds, but we’ve already skipped a chunk, leaving
a hole in the discard range.
- We get intermittent successes, which produces alternating chunks of
discarded and undiscarded blocks.
In each of those scenarios, the disk ends up with a partially discarded
range, so the correct fix is to break out of the loop immediately and
proceed to xfs_discard_busy_clear() once the very first allocation fails.
On Wed, Nov 26, 2025 at 08:07:21AM +0000, Chaitanya Kulkarni wrote: > The retry for discard bio memory allocation is not desired I think, > since it's only a hint to the controller. Yes, it is. The command is defined as a hint, but it's required for a lot of workloads to work. It's not just a speculative readahead.
On 11/26/25 16:07, Chaitanya Kulkarni via Linux-f2fs-devel wrote:
> On 11/25/25 18:37, Yongpeng Yang wrote:
>>> diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
>>> index 6917de832191..b6ffe4807a11 100644
>>> --- a/fs/xfs/xfs_discard.c
>>> +++ b/fs/xfs/xfs_discard.c
>>> @@ -108,7 +108,7 @@ xfs_discard_endio(
>>> * list. We plug and chain the bios so that we only need a single
>>> completion
>>> * call to clear all the busy extents once the discards are complete.
>>> */
>>> -int
>>> +void
>>> xfs_discard_extents(
>>> struct xfs_mount *mp,
>>> struct xfs_busy_extents *extents)
>>> @@ -116,7 +116,6 @@ xfs_discard_extents(
>>> struct xfs_extent_busy *busyp;
>>> struct bio *bio = NULL;
>>> struct blk_plug plug;
>>> - int error = 0;
>>> blk_start_plug(&plug);
>>> list_for_each_entry(busyp, &extents->extent_list, list) {
>>> @@ -126,18 +125,10 @@ xfs_discard_extents(
>>> trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
>>> - error = __blkdev_issue_discard(btp->bt_bdev,
>>> + __blkdev_issue_discard(btp->bt_bdev,
>>> xfs_gbno_to_daddr(xg, busyp->bno),
>>> XFS_FSB_TO_BB(mp, busyp->length),
>>> GFP_KERNEL, &bio);
>>
>> If blk_alloc_discard_bio() fails to allocate a bio inside
>> __blkdev_issue_discard(), this may lead to an invalid loop in
>> list_for_each_entry{}. Instead of using __blkdev_issue_discard(), how
>> about allocate and submit the discard bios explicitly in
>> list_for_each_entry{}?
>>
>> Yongpeng,
>
>
> Calling __blkdev_issue_discard() keeps managing all the bio with the
> appropriate GFP mask, so the semantics stay the same. You are just
> moving memory allocation to the caller and potentially looking at
> implementing retry on bio allocation failure.
>
> The retry for discard bio memory allocation is not desired I think,
> since it's only a hint to the controller.
Agreed. I'm not trying to retry bio allocation inside the
list_for_each_entry{} loop. Instead, since blk_alloc_discard_bio()
returning NULL cannot reliably indicate whether the failure is due to
bio allocation failure, it could also be caused by 'bio_sects == 0', I'd
like to allocate the bio explicitly.
>
> This patch is simply cleaning up dead error-handling branches at the
> callers no behavioral changes intended.
>
> What maybe useful is to stop iterating once we fail to allocate the
> bio [1].
>
> -ck
>
> [1] Potential addition on the top of this to exit early in discard loop
> on bio allocation failure.
>
> diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
> index b6ffe4807a11..1519f708bb79 100644
> --- a/fs/xfs/xfs_discard.c
> +++ b/fs/xfs/xfs_discard.c
> @@ -129,6 +129,13 @@ xfs_discard_extents(
> xfs_gbno_to_daddr(xg, busyp->bno),
> XFS_FSB_TO_BB(mp, busyp->length),
> GFP_KERNEL, &bio);
> + /*
> + * We failed to allocate bio instead of continuing the loop
> + * so it will lead to inconsistent discards to the disk
> + * exit early and jump into xfs_discard_busy_clear().
> + */
> + if (!bio)
> + break;
I noticed that as long as XFS_FSB_TO_BB(mp, busyp->length) is greater
than 0 and there is no bio allocation failure, __blkdev_issue_discard()
will never return NULL. I'm not familiar with this part of the xfs, so
I'm not sure whether there are cases where 'XFS_FSB_TO_BB(mp,
busyp->length)' could be 0. If such cases do not exist, then
checking whether the bio is NULL should be sufficient.
Yongpeng,
> }
>
> if (bio) {
> > If we keep looping after the first bio == NULL, the rest of the range is
> guaranteed to be inconsistent anyways, because every subsequent iteration
> will fall into one of three cases:
>
> - The allocator keeps returning NULL, so none of the remaining LBAs receive
> discard.
> - Rest of the allocator succeeds, but we’ve already skipped a chunk, leaving
> a hole in the discard range.
> - We get intermittent successes, which produces alternating chunks of
> discarded and undiscarded blocks.
>
> In each of those scenarios, the disk ends up with a partially discarded
> range, so the correct fix is to break out of the loop immediately and
> proceed to xfs_discard_busy_clear() once the very first allocation fails.
On 11/26/25 17:14, Yongpeng Yang wrote:
> On 11/26/25 16:07, Chaitanya Kulkarni via Linux-f2fs-devel wrote:
>> On 11/25/25 18:37, Yongpeng Yang wrote:
>>>> diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
>>>> index 6917de832191..b6ffe4807a11 100644
>>>> --- a/fs/xfs/xfs_discard.c
>>>> +++ b/fs/xfs/xfs_discard.c
>>>> @@ -108,7 +108,7 @@ xfs_discard_endio(
>>>> * list. We plug and chain the bios so that we only need a single
>>>> completion
>>>> * call to clear all the busy extents once the discards are
>>>> complete.
>>>> */
>>>> -int
>>>> +void
>>>> xfs_discard_extents(
>>>> struct xfs_mount *mp,
>>>> struct xfs_busy_extents *extents)
>>>> @@ -116,7 +116,6 @@ xfs_discard_extents(
>>>> struct xfs_extent_busy *busyp;
>>>> struct bio *bio = NULL;
>>>> struct blk_plug plug;
>>>> - int error = 0;
>>>> blk_start_plug(&plug);
>>>> list_for_each_entry(busyp, &extents->extent_list, list) {
>>>> @@ -126,18 +125,10 @@ xfs_discard_extents(
>>>> trace_xfs_discard_extent(xg, busyp->bno, busyp->length);
>>>> - error = __blkdev_issue_discard(btp->bt_bdev,
>>>> + __blkdev_issue_discard(btp->bt_bdev,
>>>> xfs_gbno_to_daddr(xg, busyp->bno),
>>>> XFS_FSB_TO_BB(mp, busyp->length),
>>>> GFP_KERNEL, &bio);
>>>
>>> If blk_alloc_discard_bio() fails to allocate a bio inside
>>> __blkdev_issue_discard(), this may lead to an invalid loop in
>>> list_for_each_entry{}. Instead of using __blkdev_issue_discard(), how
>>> about allocate and submit the discard bios explicitly in
>>> list_for_each_entry{}?
>>>
>>> Yongpeng,
>>
>>
>> Calling __blkdev_issue_discard() keeps managing all the bio with the
>> appropriate GFP mask, so the semantics stay the same. You are just
>> moving memory allocation to the caller and potentially looking at
>> implementing retry on bio allocation failure.
>>
>> The retry for discard bio memory allocation is not desired I think,
>> since it's only a hint to the controller.
>
> Agreed. I'm not trying to retry bio allocation inside the
> list_for_each_entry{} loop. Instead, since blk_alloc_discard_bio()
> returning NULL cannot reliably indicate whether the failure is due to
> bio allocation failure, it could also be caused by 'bio_sects == 0', I'd
> like to allocate the bio explicitly.
>
>>
>> This patch is simply cleaning up dead error-handling branches at the
>> callers no behavioral changes intended.
>>
>> What maybe useful is to stop iterating once we fail to allocate the
>> bio [1].
>>
>> -ck
>>
>> [1] Potential addition on the top of this to exit early in discard loop
>> on bio allocation failure.
>>
>> diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
>> index b6ffe4807a11..1519f708bb79 100644
>> --- a/fs/xfs/xfs_discard.c
>> +++ b/fs/xfs/xfs_discard.c
>> @@ -129,6 +129,13 @@ xfs_discard_extents(
>> xfs_gbno_to_daddr(xg, busyp->bno),
>> XFS_FSB_TO_BB(mp, busyp->length),
>> GFP_KERNEL, &bio);
>> + /*
>> + * We failed to allocate bio instead of continuing the
>> loop
>> + * so it will lead to inconsistent discards to the disk
>> + * exit early and jump into xfs_discard_busy_clear().
>> + */
>> + if (!bio)
>> + break;
>
> I noticed that as long as XFS_FSB_TO_BB(mp, busyp->length) is greater
> than 0 and there is no bio allocation failure, __blkdev_issue_discard()
> will never return NULL. I'm not familiar with this part of the xfs, so
> I'm not sure whether there are cases where 'XFS_FSB_TO_BB(mp,
> busyp->length)' could be 0. If such cases do not exist, then
> checking whether the bio is NULL should be sufficient.
>
> Yongpeng,
If __blkdev_issue_discard() requires multiple calls to
blk_alloc_discard_bio(), once the first bio allocation succeeds, it will
never result in bio == NULL, meaning that any subsequent bio allocation
failures cannot be detected.
Yongpeng,
>
>> }
>> if (bio) {
>> > If we keep looping after the first bio == NULL, the rest of the
>> range is
>> guaranteed to be inconsistent anyways, because every subsequent iteration
>> will fall into one of three cases:
>>
>> - The allocator keeps returning NULL, so none of the remaining LBAs
>> receive
>> discard.
>> - Rest of the allocator succeeds, but we’ve already skipped a chunk,
>> leaving
>> a hole in the discard range.
>> - We get intermittent successes, which produces alternating chunks of
>> discarded and undiscarded blocks.
>>
>> In each of those scenarios, the disk ends up with a partially discarded
>> range, so the correct fix is to break out of the loop immediately and
>> proceed to xfs_discard_busy_clear() once the very first allocation fails.
© 2016 - 2025 Red Hat, Inc.