Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Stephen Zhang 2 weeks, 6 days ago

---------- Forwarded message ---------
发件人： zhangshida <starzhangzsd@gmail.com>
Date: 2026年1月20日周二 10:35
Subject: [PATCH v2] bcache: use bio cloning for detached device requests
To: <colyli@fnnas.com>, <kent.overstreet@linux.dev>,
<axboe@kernel.dk>, <sashal@kernel.org>, <hch@infradead.org>
Cc: <linux-bcache@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<zhangshida@kylinos.cn>, <starzhangzsd@gmail.com>, Christoph Hellwig
<hch@lst.de>


From: Shida Zhang <zhangshida@kylinos.cn>

Previously, bcache hijacked the bi_end_io and bi_private fields of
the incoming bio when the backing device was in a detached state.
This is fragile and breaks if the bio is needed to be processed by
other layers.

This patch transitions to using a cloned bio embedded within a private
structure. This ensures the original bio's metadata remains untouched.

Fixes: 53280e398471 ("bcache: fix improper use of bi_end_io")
Co-developed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Shida Zhang <zhangshida@kylinos.cn>
---

Changelog:
v1:
https://lore.kernel.org/all/20260115074811.230807-1-zhangshida@kylinos.cn/

 drivers/md/bcache/bcache.h  |  9 +++++
 drivers/md/bcache/request.c | 79 ++++++++++++++++---------------------
 drivers/md/bcache/super.c   | 12 +++++-
 3 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 8ccacba8554..54ff4e0238a 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -273,6 +273,8 @@ struct bcache_device {

        struct bio_set          bio_split;

+       struct bio_set          bio_detach;
+
        unsigned int            data_csum:1;

        int (*cache_miss)(struct btree *b, struct search *s,
@@ -753,6 +755,13 @@ struct bbio {
        struct bio              bio;
 };

+struct detached_dev_io_private {
+       struct bcache_device    *d;
+       unsigned long           start_time;
+       struct bio              *orig_bio;
+       struct bio              bio;
+};
+
 #define BTREE_PRIO             USHRT_MAX
 #define INITIAL_PRIO           32768U

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 82fdea7dea7..e0b12cb622b 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1077,68 +1077,58 @@ static CLOSURE_CALLBACK(cached_dev_nodata)
        continue_at(cl, cached_dev_bio_complete, NULL);
 }

-struct detached_dev_io_private {
-       struct bcache_device    *d;
-       unsigned long           start_time;
-       bio_end_io_t            *bi_end_io;
-       void                    *bi_private;
-       struct block_device     *orig_bdev;
-};
-
 static void detached_dev_end_io(struct bio *bio)
 {
-       struct detached_dev_io_private *ddip;
-
-       ddip = bio->bi_private;
-       bio->bi_end_io = ddip->bi_end_io;
-       bio->bi_private = ddip->bi_private;
+       struct detached_dev_io_private *ddip =
+               container_of(bio, struct detached_dev_io_private, bio);
+       struct bio *orig_bio = ddip->orig_bio;

        /* Count on the bcache device */
-       bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
+       bio_end_io_acct(orig_bio, ddip->start_time);

        if (bio->bi_status) {
-               struct cached_dev *dc = container_of(ddip->d,
-                                                    struct cached_dev, disk);
+               struct cached_dev *dc = bio->bi_private;
+
                /* should count I/O error for backing device here */
                bch_count_backing_io_errors(dc, bio);
+               orig_bio->bi_status = bio->bi_status;
        }

-       kfree(ddip);
-       bio_endio(bio);
+       bio_put(bio);
+       bio_endio(orig_bio);
 }

-static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
-               struct block_device *orig_bdev, unsigned long start_time)
+static void detached_dev_do_request(struct bcache_device *d,
+               struct bio *orig_bio, unsigned long start_time)
 {
        struct detached_dev_io_private *ddip;
        struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+       struct bio *clone_bio;

-       /*
-        * no need to call closure_get(&dc->disk.cl),
-        * because upper layer had already opened bcache device,
-        * which would call closure_get(&dc->disk.cl)
-        */
-       ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
-       if (!ddip) {
-               bio->bi_status = BLK_STS_RESOURCE;
-               bio_endio(bio);
+       if (bio_op(orig_bio) == REQ_OP_DISCARD &&
+           !bdev_max_discard_sectors(dc->bdev)) {
+               bio_endio(orig_bio);
                return;
        }

-       ddip->d = d;
+       clone_bio = bio_alloc_clone(dc->bdev, orig_bio, GFP_NOIO,
+                                   &d->bio_detach);
+       if (!clone_bio) {
+               orig_bio->bi_status = BLK_STS_RESOURCE;
+               bio_endio(orig_bio);
+               return;
+       }
+
+       ddip = container_of(clone_bio, struct detached_dev_io_private, bio);
        /* Count on the bcache device */
-       ddip->orig_bdev = orig_bdev;
+       ddip->d = d;
        ddip->start_time = start_time;
-       ddip->bi_end_io = bio->bi_end_io;
-       ddip->bi_private = bio->bi_private;
-       bio->bi_end_io = detached_dev_end_io;
-       bio->bi_private = ddip;
-
-       if ((bio_op(bio) == REQ_OP_DISCARD) &&
-           !bdev_max_discard_sectors(dc->bdev))
-               detached_dev_end_io(bio);
-       else
-               submit_bio_noacct(bio);
+       ddip->orig_bio = orig_bio;
+
+       clone_bio->bi_end_io = detached_dev_end_io;
+       clone_bio->bi_private = dc;
+
+       submit_bio_noacct(clone_bio);
 }

 static void quit_max_writeback_rate(struct cache_set *c,
@@ -1214,10 +1204,10 @@ void cached_dev_submit_bio(struct bio *bio)

        start_time = bio_start_io_acct(bio);

-       bio_set_dev(bio, dc->bdev);
        bio->bi_iter.bi_sector += dc->sb.data_offset;

        if (cached_dev_get(dc)) {
+               bio_set_dev(bio, dc->bdev);
                s = search_alloc(bio, d, orig_bdev, start_time);
                trace_bcache_request_start(s->d, bio);

@@ -1237,9 +1227,10 @@ void cached_dev_submit_bio(struct bio *bio)
                        else
                                cached_dev_read(dc, s);
                }
-       } else
+       } else {
                /* I/O request sent to backing device */
-               detached_dev_do_request(d, bio, orig_bdev, start_time);
+               detached_dev_do_request(d, bio, start_time);
+       }
 }

 static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c17d4517af2..d4b798668c8 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -887,6 +887,7 @@ static void bcache_device_free(struct bcache_device *d)
        }

        bioset_exit(&d->bio_split);
+       bioset_exit(&d->bio_detach);
        kvfree(d->full_dirty_stripes);
        kvfree(d->stripe_sectors_dirty);

@@ -949,6 +950,11 @@ static int bcache_device_init(struct
bcache_device *d, unsigned int block_size,
                        BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
                goto out_ida_remove;

+       if (bioset_init(&d->bio_detach, 4,
+                       offsetof(struct detached_dev_io_private, bio),
+                       BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
+               goto out_bioset_split_exit;
+
        if (lim.logical_block_size > PAGE_SIZE && cached_bdev) {
                /*
                 * This should only happen with BCACHE_SB_VERSION_BDEV.
@@ -964,7 +970,7 @@ static int bcache_device_init(struct bcache_device
*d, unsigned int block_size,

        d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
        if (IS_ERR(d->disk))
-               goto out_bioset_exit;
+               goto out_bioset_detach_exit;

        set_capacity(d->disk, sectors);
        snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
@@ -976,7 +982,9 @@ static int bcache_device_init(struct bcache_device
*d, unsigned int block_size,
        d->disk->private_data   = d;
        return 0;

-out_bioset_exit:
+out_bioset_detach_exit:
+       bioset_exit(&d->bio_detach);
+out_bioset_split_exit:
        bioset_exit(&d->bio_split);
 out_ida_remove:
        ida_free(&bcache_device_idx, idx);
--
2.34.1

Re: Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Coly Li 2 weeks, 6 days ago

On Tue, Jan 20, 2026 at 10:39:36AM +0800, Stephen Zhang wrote:
> ---------- Forwarded message ---------
> 发件人： zhangshida <starzhangzsd@gmail.com>
> Date: 2026年1月20日周二 10:35
> Subject: [PATCH v2] bcache: use bio cloning for detached device requests
> To: <colyli@fnnas.com>, <kent.overstreet@linux.dev>,
> <axboe@kernel.dk>, <sashal@kernel.org>, <hch@infradead.org>
> Cc: <linux-bcache@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
> <zhangshida@kylinos.cn>, <starzhangzsd@gmail.com>, Christoph Hellwig
> <hch@lst.de>
> 
> 
> From: Shida Zhang <zhangshida@kylinos.cn>
> 
> Previously, bcache hijacked the bi_end_io and bi_private fields of
> the incoming bio when the backing device was in a detached state.
> This is fragile and breaks if the bio is needed to be processed by
> other layers.
> 
> This patch transitions to using a cloned bio embedded within a private
> structure. This ensures the original bio's metadata remains untouched.
> 
> Fixes: 53280e398471 ("bcache: fix improper use of bi_end_io")
> Co-developed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Shida Zhang <zhangshida@kylinos.cn>
> ---
> 
> Changelog:
> v1:
> https://lore.kernel.org/all/20260115074811.230807-1-zhangshida@kylinos.cn/
> 
>  drivers/md/bcache/bcache.h  |  9 +++++
>  drivers/md/bcache/request.c | 79 ++++++++++++++++---------------------
>  drivers/md/bcache/super.c   | 12 +++++-
>  3 files changed, 54 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 8ccacba8554..54ff4e0238a 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -273,6 +273,8 @@ struct bcache_device {
> 
>         struct bio_set          bio_split;
> 
> +       struct bio_set          bio_detach;
                                  ^^^^^^^^^-> better to rename it to bio_detached

>         unsigned int            data_csum:1;
> 
>         int (*cache_miss)(struct btree *b, struct search *s,
> @@ -753,6 +755,13 @@ struct bbio {
>         struct bio              bio;
>  };
> 
> +struct detached_dev_io_private {
> +       struct bcache_device    *d;
> +       unsigned long           start_time;
> +       struct bio              *orig_bio;
> +       struct bio              bio;
> +};
> +
>  #define BTREE_PRIO             USHRT_MAX
>  #define INITIAL_PRIO           32768U
> 
> diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> index 82fdea7dea7..e0b12cb622b 100644
> --- a/drivers/md/bcache/request.c
> +++ b/drivers/md/bcache/request.c
> @@ -1077,68 +1077,58 @@ static CLOSURE_CALLBACK(cached_dev_nodata)
>         continue_at(cl, cached_dev_bio_complete, NULL);
>  }
> 
> -struct detached_dev_io_private {
> -       struct bcache_device    *d;
> -       unsigned long           start_time;
> -       bio_end_io_t            *bi_end_io;
> -       void                    *bi_private;
> -       struct block_device     *orig_bdev;
> -};
> -
>  static void detached_dev_end_io(struct bio *bio)
>  {
> -       struct detached_dev_io_private *ddip;
> -
> -       ddip = bio->bi_private;
> -       bio->bi_end_io = ddip->bi_end_io;
> -       bio->bi_private = ddip->bi_private;
> +       struct detached_dev_io_private *ddip =
> +               container_of(bio, struct detached_dev_io_private, bio);
> +       struct bio *orig_bio = ddip->orig_bio;
> 
>         /* Count on the bcache device */
> -       bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
> +       bio_end_io_acct(orig_bio, ddip->start_time);
>
>         if (bio->bi_status) {
> -               struct cached_dev *dc = container_of(ddip->d,
> -                                                    struct cached_dev, disk);
> +               struct cached_dev *dc = bio->bi_private;
> +
>                 /* should count I/O error for backing device here */
>                 bch_count_backing_io_errors(dc, bio);
> +               orig_bio->bi_status = bio->bi_status;
>         }
> 
> -       kfree(ddip);
> -       bio_endio(bio);
> +       bio_put(bio);
> +       bio_endio(orig_bio);
>  }
> 
> -static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
> -               struct block_device *orig_bdev, unsigned long start_time)
> +static void detached_dev_do_request(struct bcache_device *d,
> +               struct bio *orig_bio, unsigned long start_time)
>  {
>         struct detached_dev_io_private *ddip;
>         struct cached_dev *dc = container_of(d, struct cached_dev, disk);
> +       struct bio *clone_bio;
> 
> -       /*
> -        * no need to call closure_get(&dc->disk.cl),
> -        * because upper layer had already opened bcache device,
> -        * which would call closure_get(&dc->disk.cl)
> -        */
> -       ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
> -       if (!ddip) {
> -               bio->bi_status = BLK_STS_RESOURCE;
> -               bio_endio(bio);
> +       if (bio_op(orig_bio) == REQ_OP_DISCARD &&
> +           !bdev_max_discard_sectors(dc->bdev)) {
> +               bio_endio(orig_bio);
>                 return;
>         }
> 
> -       ddip->d = d;
> +       clone_bio = bio_alloc_clone(dc->bdev, orig_bio, GFP_NOIO,
> +                                   &d->bio_detach);
> +       if (!clone_bio) {
> +               orig_bio->bi_status = BLK_STS_RESOURCE;
> +               bio_endio(orig_bio);
> +               return;
> +       }
> +
> +       ddip = container_of(clone_bio, struct detached_dev_io_private, bio);
>         /* Count on the bcache device */
> -       ddip->orig_bdev = orig_bdev;
> +       ddip->d = d;
>         ddip->start_time = start_time;
> -       ddip->bi_end_io = bio->bi_end_io;
> -       ddip->bi_private = bio->bi_private;
> -       bio->bi_end_io = detached_dev_end_io;
> -       bio->bi_private = ddip;
> -
> -       if ((bio_op(bio) == REQ_OP_DISCARD) &&
> -           !bdev_max_discard_sectors(dc->bdev))
> -               detached_dev_end_io(bio);
> -       else
> -               submit_bio_noacct(bio);
> +       ddip->orig_bio = orig_bio;
> +
> +       clone_bio->bi_end_io = detached_dev_end_io;
> +       clone_bio->bi_private = dc;
> +
> +       submit_bio_noacct(clone_bio);
>  }
> 
>  static void quit_max_writeback_rate(struct cache_set *c,
> @@ -1214,10 +1204,10 @@ void cached_dev_submit_bio(struct bio *bio)
> 
>         start_time = bio_start_io_acct(bio);
> 
> -       bio_set_dev(bio, dc->bdev);
>         bio->bi_iter.bi_sector += dc->sb.data_offset;
> 
>         if (cached_dev_get(dc)) {
> +               bio_set_dev(bio, dc->bdev);
>                 s = search_alloc(bio, d, orig_bdev, start_time);
>                 trace_bcache_request_start(s->d, bio);
> 
> @@ -1237,9 +1227,10 @@ void cached_dev_submit_bio(struct bio *bio)
>                         else
>                                 cached_dev_read(dc, s);
>                 }
> -       } else
> +       } else {
>                 /* I/O request sent to backing device */
> -               detached_dev_do_request(d, bio, orig_bdev, start_time);
> +               detached_dev_do_request(d, bio, start_time);
> +       }
>  }
> 
>  static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index c17d4517af2..d4b798668c8 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -887,6 +887,7 @@ static void bcache_device_free(struct bcache_device *d)
>         }
> 
>         bioset_exit(&d->bio_split);
> +       bioset_exit(&d->bio_detach);
>         kvfree(d->full_dirty_stripes);
>         kvfree(d->stripe_sectors_dirty);
> 
> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
> bcache_device *d, unsigned int block_size,
>                         BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
>                 goto out_ida_remove;
> 
> +       if (bioset_init(&d->bio_detach, 4,
					^^^^^-> I feel 4 might be a bit small
here. bio_detached set is for normal IO when backing device is not attached to
a cache device. I would suggest to set the pool size to 128 or 256.

> +                       offsetof(struct detached_dev_io_private, bio),
> +                       BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
> +               goto out_bioset_split_exit;
> +
>         if (lim.logical_block_size > PAGE_SIZE && cached_bdev) {
>                 /*
>                  * This should only happen with BCACHE_SB_VERSION_BDEV.
> @@ -964,7 +970,7 @@ static int bcache_device_init(struct bcache_device
> *d, unsigned int block_size,
> 
>         d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
>         if (IS_ERR(d->disk))
> -               goto out_bioset_exit;
> +               goto out_bioset_detach_exit;
> 
>         set_capacity(d->disk, sectors);
>         snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
> @@ -976,7 +982,9 @@ static int bcache_device_init(struct bcache_device
> *d, unsigned int block_size,
>         d->disk->private_data   = d;
>         return 0;
> 
> -out_bioset_exit:
> +out_bioset_detach_exit:
> +       bioset_exit(&d->bio_detach);
> +out_bioset_split_exit:
>         bioset_exit(&d->bio_split);
>  out_ida_remove:
>         ida_free(&bcache_device_idx, idx);
> --

Rested part is good to me. Thanks for patching up.

Coly Li

Re: Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Jens Axboe 2 weeks, 6 days ago

On 1/20/26 7:46 AM, Coly Li wrote:
>> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
>> bcache_device *d, unsigned int block_size,
>>                         BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
>>                 goto out_ida_remove;
>>
>> +       if (bioset_init(&d->bio_detach, 4,
> 					^^^^^-> I feel 4 might be a bit small
> here. bio_detached set is for normal IO when backing device is not
> attached to a cache device. I would suggest to set the pool size to
> 128 or 256.

Absolutely not, 4 is more than plenty. The pool elements are only ever
used if allocations fail, to guarantee forward progress. Setting aside
128 or 256 for that case is utterly wasteful, you only need a couple. 4
is a good number, if anything it should be smaller (2).

-- 
Jens Axboe

Re: Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Coly Li 2 weeks, 5 days ago

On Tue, Jan 20, 2026 at 08:01:52AM +0800, Jens Axboe wrote:
> On 1/20/26 7:46 AM, Coly Li wrote:
> >> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
> >> bcache_device *d, unsigned int block_size,
> >>                         BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
> >>                 goto out_ida_remove;
> >>
> >> +       if (bioset_init(&d->bio_detach, 4,
> > 					^^^^^-> I feel 4 might be a bit small
> > here. bio_detached set is for normal IO when backing device is not
> > attached to a cache device. I would suggest to set the pool size to
> > 128 or 256.
> 
> Absolutely not, 4 is more than plenty. The pool elements are only ever
> used if allocations fail, to guarantee forward progress. Setting aside
> 128 or 256 for that case is utterly wasteful, you only need a couple. 4
> is a good number, if anything it should be smaller (2).

Hi Jens,

Thanks for the information. Please correct me if I am wrong for the following
text,
- If the backing is a normal SSD raid0, the IOPS without attached cache device
might be more than thousands. In this case, I assume 128 or 256 might be more
tolerant.
- I see what ‘4’ means, just not sure/comfortable when memory pressure is high.
And reserving 128/256 will occupy around 0.5~1MB memory, I feel such extra
memory is acceptable in bcache use case.

Don't get me wrong, I totally trust you. If '4' works well enough for high
memory pressure condition for detached bcache device, it is cool.

Thanks in advance.

Coly Li

Re: Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Kent Overstreet 2 weeks, 4 days ago

On Wed, Jan 21, 2026 at 09:34:01AM +0800, Coly Li wrote:
> On Tue, Jan 20, 2026 at 08:01:52AM +0800, Jens Axboe wrote:
> > On 1/20/26 7:46 AM, Coly Li wrote:
> > >> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
> > >> bcache_device *d, unsigned int block_size,
> > >>                         BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
> > >>                 goto out_ida_remove;
> > >>
> > >> +       if (bioset_init(&d->bio_detach, 4,
> > > 					^^^^^-> I feel 4 might be a bit small
> > > here. bio_detached set is for normal IO when backing device is not
> > > attached to a cache device. I would suggest to set the pool size to
> > > 128 or 256.
> > 
> > Absolutely not, 4 is more than plenty. The pool elements are only ever
> > used if allocations fail, to guarantee forward progress. Setting aside
> > 128 or 256 for that case is utterly wasteful, you only need a couple. 4
> > is a good number, if anything it should be smaller (2).
> 
> Hi Jens,
> 
> Thanks for the information. Please correct me if I am wrong for the following
> text,
> - If the backing is a normal SSD raid0, the IOPS without attached cache device
> might be more than thousands. In this case, I assume 128 or 256 might be more
> tolerant.
> - I see what ‘4’ means, just not sure/comfortable when memory pressure is high.
> And reserving 128/256 will occupy around 0.5~1MB memory, I feel such extra
> memory is acceptable in bcache use case.
> 
> Don't get me wrong, I totally trust you. If '4' works well enough for high
> memory pressure condition for detached bcache device, it is cool.

I'd actually be in favor of raising it. mm isn't getting any better at
reclaim latency under load - I've been seeing regresisons in that area -
and considering that mm doesn't have its own reserves for allocations
critical for reclaim, code in reclaim paths probably should.

"How often it's used" on a well behaving system in a benchmark scenario
is not relevant, the relevant thing to look at would be allocation
latency in OOM scenarios.

time_stats that bcache/bcachefs have is quite useful here. We recently
saw btree node allocation buffers taking > 10 seconds to allocate, so
this is a real issue.

Memory for bios is a drop in the bucket compared to the btree node
cache, so the amount of memory is not something to overly sweat here.

Re: Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Stephen Zhang 2 weeks, 5 days ago

Coly Li <colyli@fnnas.com> 于2026年1月21日周三 09:34写道：
>
> On Tue, Jan 20, 2026 at 08:01:52AM +0800, Jens Axboe wrote:
> > On 1/20/26 7:46 AM, Coly Li wrote:
> > >> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
> > >> bcache_device *d, unsigned int block_size,
> > >>                         BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
> > >>                 goto out_ida_remove;
> > >>
> > >> +       if (bioset_init(&d->bio_detach, 4,
> > >                                     ^^^^^-> I feel 4 might be a bit small
> > > here. bio_detached set is for normal IO when backing device is not
> > > attached to a cache device. I would suggest to set the pool size to
> > > 128 or 256.
> >
> > Absolutely not, 4 is more than plenty. The pool elements are only ever
> > used if allocations fail, to guarantee forward progress. Setting aside
> > 128 or 256 for that case is utterly wasteful, you only need a couple. 4
> > is a good number, if anything it should be smaller (2).
>
> Hi Jens,
>
> Thanks for the information. Please correct me if I am wrong for the following
> text,
> - If the backing is a normal SSD raid0, the IOPS without attached cache device
> might be more than thousands. In this case, I assume 128 or 256 might be more
> tolerant.
> - I see what ‘4’ means, just not sure/comfortable when memory pressure is high.
> And reserving 128/256 will occupy around 0.5~1MB memory, I feel such extra
> memory is acceptable in bcache use case.
>
> Don't get me wrong, I totally trust you. If '4' works well enough for high
> memory pressure condition for detached bcache device, it is cool.
>
> Thanks in advance.
>
> Coly Li

Hi Jens, Coly,

Regarding the discussion on the bio_detached pool size: while 4 is
sufficient to guarantee forward progress, some high-load environments
may prefer a larger reserve to minimize allocation latency under
extreme memory pressure.

To provide a middle ground, I propose adding a generic bioset_resize()
interface to the block layer and exposing it through a new bcache
sysfs attribute 'detached_pool_size'.

This allows us to keep the default value conservative (e.g., 4) to
avoid unnecessary memory overhead, while giving users the flexibility
to tune the emergency reserves based on their specific hardware and
workload requirements.

The patch is attached below. Does this look like a reasonable
compromise?

Thanks,
Shida
------------
From 511d7404ce482d4ae8faf5911cf3e11bb791369e Mon Sep 17 00:00:00 2001
From: Shida Zhang <zhangshida@kylinos.cn>
Date: Wed, 21 Jan 2026 19:32:06 +0800
Subject: [PATCH] block/bcache: introduce bioset_resize() and use it in bcache

Subsystems using bio_sets currently have their reserve pool sizes fixed at
initialization. In some scenarios, such as high-load storage environments or
memory-constrained systems, it is beneficial to adjust these emergency
reserves dynamically without tearing down the entire bioset.

Introduce bioset_resize() to allow the resizing of the bio_pool, bvec_pool,
within an existing bio_set.

Additionally, update bcache to expose this functionality. A new sysfs
attribute 'detached_pool_size' is added to bcache devices, allowing users
to tune the emergency reserve for detached bios based on production
workloads.

Signed-off-by: Shida Zhang <zhangshida@kylinos.cn>
---
 block/bio.c               | 30 ++++++++++++++++++++++++++++++
 drivers/md/bcache/sysfs.c | 34 ++++++++++++++++++++++++++++++++++
 include/linux/bio.h       |  1 +
 3 files changed, 65 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index e726c0e280a..8c80325e616 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1857,6 +1857,36 @@ int bioset_init(struct bio_set *bs,
 }
 EXPORT_SYMBOL(bioset_init);

+/**
+ * bioset_resize - resize a bio_set's reserve pools
+ * @bs: bio_set to resize
+ * @pool_size: new number of elements in the reserve pool
+ *
+ * Description:
+ *    Resizes the pre-allocated elements in a bio_set. This allows a subsystem
+ *    to increase or decrease its emergency reserves dynamically.
+ *
+ * Return:
+ *    0 on success, -ENOMEM on failure.
+ */
+int bioset_resize(struct bio_set *bs, unsigned int pool_size)
+{
+ int ret;
+
+ ret = mempool_resize(&bs->bio_pool, pool_size);
+ if (ret)
+ return ret;
+
+ if (bs->bvec_pool.pool_data) {
+ ret = mempool_resize(&bs->bvec_pool, pool_size);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(bioset_resize);
+
 static int __init init_bio(void)
 {
  int i;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 72f38e5b6f5..de63c04dafe 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -16,6 +16,7 @@
 #include <linux/blkdev.h>
 #include <linux/sort.h>
 #include <linux/sched/clock.h>
+#include <linux/bio.h>

 extern bool bcache_is_reboot;

@@ -150,6 +151,8 @@ rw_attribute(copy_gc_enabled);
 rw_attribute(idle_max_writeback_rate);
 rw_attribute(gc_after_writeback);
 rw_attribute(size);
+rw_attribute(detached_pool_size);
+

 static ssize_t bch_snprint_string_list(char *buf,
         size_t size,
@@ -167,6 +170,30 @@ static ssize_t bch_snprint_string_list(char *buf,
  return out - buf;
 }

+SHOW(bch_detached_pool_size)
+{
+ struct bcache_device *d = container_of(kobj, struct bcache_device, kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", d->bio_detached.bio_pool.min_nr);
+}
+
+STORE(bch_detached_pool_size)
+{
+ struct bcache_device *d = container_of(kobj, struct bcache_device, kobj);
+ unsigned int v;
+ int ret;
+
+ /* Bcache STORE macro uses 'size', not 'count' */
+ if (kstrtouint(buf, 10, &v) || v < 1)
+ return -EINVAL;
+
+ ret = bioset_resize(&d->bio_detached, v);
+ if (ret)
+ return ret;
+
+ return size;
+}
+
 SHOW(__bch_cached_dev)
 {
  struct cached_dev *dc = container_of(kobj, struct cached_dev,
@@ -282,6 +309,9 @@ SHOW(__bch_cached_dev)
  return strlen(buf);
  }

+ if (attr == &sysfs_detached_pool_size)
+ return bch_detached_pool_size_show(kobj, attr, buf);
+
 #undef var
  return 0;
 }
@@ -450,6 +480,9 @@ STORE(__cached_dev)
  if (attr == &sysfs_stop)
  bcache_device_stop(&dc->disk);

+ if (attr == &sysfs_detached_pool_size)
+ return bch_detached_pool_size_store(kobj, attr, buf, size);
+
  return size;
 }

@@ -540,6 +573,7 @@ static struct attribute *bch_cached_dev_attrs[] = {
 #endif
  &sysfs_backing_dev_name,
  &sysfs_backing_dev_uuid,
+ &sysfs_detached_pool_size,
  NULL
 };
 ATTRIBUTE_GROUPS(bch_cached_dev);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c75a9b3672a..2d8a657000c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -354,6 +354,7 @@ enum {
 extern int bioset_init(struct bio_set *, unsigned int, unsigned int,
int flags);
 extern void bioset_exit(struct bio_set *);
 extern int biovec_init_pool(mempool_t *pool, int pool_entries);
+int bioset_resize(struct bio_set *bs, unsigned int pool_size);

 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
       blk_opf_t opf, gfp_t gfp_mask,
-- 
2.34.1

Re: Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Jens Axboe 2 weeks, 5 days ago

On 1/21/26 4:55 AM, Stephen Zhang wrote:
> Coly Li <colyli@fnnas.com> 于2026年1月21日周三 09:34写道：
>>
>> On Tue, Jan 20, 2026 at 08:01:52AM +0800, Jens Axboe wrote:
>>> On 1/20/26 7:46 AM, Coly Li wrote:
>>>>> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
>>>>> bcache_device *d, unsigned int block_size,
>>>>>                         BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
>>>>>                 goto out_ida_remove;
>>>>>
>>>>> +       if (bioset_init(&d->bio_detach, 4,
>>>>                                     ^^^^^-> I feel 4 might be a bit small
>>>> here. bio_detached set is for normal IO when backing device is not
>>>> attached to a cache device. I would suggest to set the pool size to
>>>> 128 or 256.
>>>
>>> Absolutely not, 4 is more than plenty. The pool elements are only ever
>>> used if allocations fail, to guarantee forward progress. Setting aside
>>> 128 or 256 for that case is utterly wasteful, you only need a couple. 4
>>> is a good number, if anything it should be smaller (2).
>>
>> Hi Jens,
>>
>> Thanks for the information. Please correct me if I am wrong for the following
>> text,
>> - If the backing is a normal SSD raid0, the IOPS without attached cache device
>> might be more than thousands. In this case, I assume 128 or 256 might be more
>> tolerant.
>> - I see what ‘4’ means, just not sure/comfortable when memory pressure is high.
>> And reserving 128/256 will occupy around 0.5~1MB memory, I feel such extra
>> memory is acceptable in bcache use case.
>>
>> Don't get me wrong, I totally trust you. If '4' works well enough for high
>> memory pressure condition for detached bcache device, it is cool.
>>
>> Thanks in advance.
>>
>> Coly Li
> 
> Hi Jens, Coly,
> 
> Regarding the discussion on the bio_detached pool size: while 4 is
> sufficient to guarantee forward progress, some high-load environments
> may prefer a larger reserve to minimize allocation latency under
> extreme memory pressure.
> 
> To provide a middle ground, I propose adding a generic bioset_resize()
> interface to the block layer and exposing it through a new bcache
> sysfs attribute 'detached_pool_size'.
> 
> This allows us to keep the default value conservative (e.g., 4) to
> avoid unnecessary memory overhead, while giving users the flexibility
> to tune the emergency reserves based on their specific hardware and
> workload requirements.
> 
> The patch is attached below. Does this look like a reasonable
> compromise?

Guys, just stop. 4 is fine. Take a look at the default biosets and
what reserve amount they use. I'd recommend you do some testing and
check how often the reserves are _actually_ used, and once you do
that, then you'll see why this isn't necessary at all.

I thought the idea was to make some progress on getting this fixed.
Let's please do that and get it queued.

-- 
Jens Axboe

Re: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Coly Li 2 weeks, 4 days ago

> 2026年1月21日 22:50，Jens Axboe <axboe@kernel.dk> 写道：
> 
> On 1/21/26 4:55 AM, Stephen Zhang wrote:
>> Coly Li <colyli@fnnas.com> 于2026年1月21日周三 09:34写道：
>>> 
>>> On Tue, Jan 20, 2026 at 08:01:52AM +0800, Jens Axboe wrote:
>>>> On 1/20/26 7:46 AM, Coly Li wrote:
>>>>>> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
>>>>>> bcache_device *d, unsigned int block_size,
>>>>>>                        BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
>>>>>>                goto out_ida_remove;
>>>>>> 
>>>>>> +       if (bioset_init(&d->bio_detach, 4,
>>>>>                                    ^^^^^-> I feel 4 might be a bit small
>>>>> here. bio_detached set is for normal IO when backing device is not
>>>>> attached to a cache device. I would suggest to set the pool size to
>>>>> 128 or 256.
>>>> 
>>>> Absolutely not, 4 is more than plenty. The pool elements are only ever
>>>> used if allocations fail, to guarantee forward progress. Setting aside
>>>> 128 or 256 for that case is utterly wasteful, you only need a couple. 4
>>>> is a good number, if anything it should be smaller (2).
>>> 
>>> Hi Jens,
>>> 
>>> Thanks for the information. Please correct me if I am wrong for the following
>>> text,
>>> - If the backing is a normal SSD raid0, the IOPS without attached cache device
>>> might be more than thousands. In this case, I assume 128 or 256 might be more
>>> tolerant.
>>> - I see what ‘4’ means, just not sure/comfortable when memory pressure is high.
>>> And reserving 128/256 will occupy around 0.5~1MB memory, I feel such extra
>>> memory is acceptable in bcache use case.
>>> 
>>> Don't get me wrong, I totally trust you. If '4' works well enough for high
>>> memory pressure condition for detached bcache device, it is cool.
>>> 
>>> Thanks in advance.
>>> 
>>> Coly Li
>> 
>> Hi Jens, Coly,
>> 
>> Regarding the discussion on the bio_detached pool size: while 4 is
>> sufficient to guarantee forward progress, some high-load environments
>> may prefer a larger reserve to minimize allocation latency under
>> extreme memory pressure.
>> 
>> To provide a middle ground, I propose adding a generic bioset_resize()
>> interface to the block layer and exposing it through a new bcache
>> sysfs attribute 'detached_pool_size'.
>> 
>> This allows us to keep the default value conservative (e.g., 4) to
>> avoid unnecessary memory overhead, while giving users the flexibility
>> to tune the emergency reserves based on their specific hardware and
>> workload requirements.
>> 
>> The patch is attached below. Does this look like a reasonable
>> compromise?
> 
> Guys, just stop. 4 is fine. Take a look at the default biosets and
> what reserve amount they use. I'd recommend you do some testing and
> check how often the reserves are _actually_ used, and once you do
> that, then you'll see why this isn't necessary at all.
> 
> I thought the idea was to make some progress on getting this fixed.
> Let's please do that and get it queued.

Yes, let’s get the fix in firstly.

Thanks.


Coly Li

Re: Fwd: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Christoph Hellwig 2 weeks, 6 days ago

This looks good to me, thanks!

Re: [PATCH v2] bcache: use bio cloning for detached device requests

Posted by Stephen Zhang 2 weeks, 6 days ago

Stephen Zhang <starzhangzsd@gmail.com> 于2026年1月20日周二 10:39写道：
>
> ---------- Forwarded message ---------
> 发件人： zhangshida <starzhangzsd@gmail.com>
> Date: 2026年1月20日周二 10:35
> Subject: [PATCH v2] bcache: use bio cloning for detached device requests
> To: <colyli@fnnas.com>, <kent.overstreet@linux.dev>,
> <axboe@kernel.dk>, <sashal@kernel.org>, <hch@infradead.org>
> Cc: <linux-bcache@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
> <zhangshida@kylinos.cn>, <starzhangzsd@gmail.com>, Christoph Hellwig
> <hch@lst.de>
>
>
> From: Shida Zhang <zhangshida@kylinos.cn>
>
> Previously, bcache hijacked the bi_end_io and bi_private fields of
> the incoming bio when the backing device was in a detached state.
> This is fragile and breaks if the bio is needed to be processed by
> other layers.
>
> This patch transitions to using a cloned bio embedded within a private
> structure. This ensures the original bio's metadata remains untouched.
>
> Fixes: 53280e398471 ("bcache: fix improper use of bi_end_io")
> Co-developed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Shida Zhang <zhangshida@kylinos.cn>
> ---
>
> Changelog:
> v1:
> https://lore.kernel.org/all/20260115074811.230807-1-zhangshida@kylinos.cn/
>
>  drivers/md/bcache/bcache.h  |  9 +++++
>  drivers/md/bcache/request.c | 79 ++++++++++++++++---------------------
>  drivers/md/bcache/super.c   | 12 +++++-
>  3 files changed, 54 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 8ccacba8554..54ff4e0238a 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -273,6 +273,8 @@ struct bcache_device {
>
>         struct bio_set          bio_split;
>
> +       struct bio_set          bio_detach;
> +
>         unsigned int            data_csum:1;
>
>         int (*cache_miss)(struct btree *b, struct search *s,
> @@ -753,6 +755,13 @@ struct bbio {
>         struct bio              bio;
>  };
>
> +struct detached_dev_io_private {
> +       struct bcache_device    *d;
> +       unsigned long           start_time;
> +       struct bio              *orig_bio;
> +       struct bio              bio;
> +};
> +
>  #define BTREE_PRIO             USHRT_MAX
>  #define INITIAL_PRIO           32768U
>
> diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> index 82fdea7dea7..e0b12cb622b 100644
> --- a/drivers/md/bcache/request.c
> +++ b/drivers/md/bcache/request.c
> @@ -1077,68 +1077,58 @@ static CLOSURE_CALLBACK(cached_dev_nodata)
>         continue_at(cl, cached_dev_bio_complete, NULL);
>  }
>
> -struct detached_dev_io_private {
> -       struct bcache_device    *d;
> -       unsigned long           start_time;
> -       bio_end_io_t            *bi_end_io;
> -       void                    *bi_private;
> -       struct block_device     *orig_bdev;
> -};
> -
>  static void detached_dev_end_io(struct bio *bio)
>  {
> -       struct detached_dev_io_private *ddip;
> -
> -       ddip = bio->bi_private;
> -       bio->bi_end_io = ddip->bi_end_io;
> -       bio->bi_private = ddip->bi_private;
> +       struct detached_dev_io_private *ddip =
> +               container_of(bio, struct detached_dev_io_private, bio);
> +       struct bio *orig_bio = ddip->orig_bio;
>
>         /* Count on the bcache device */
> -       bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
> +       bio_end_io_acct(orig_bio, ddip->start_time);
>
>         if (bio->bi_status) {
> -               struct cached_dev *dc = container_of(ddip->d,
> -                                                    struct cached_dev, disk);
> +               struct cached_dev *dc = bio->bi_private;
> +
>                 /* should count I/O error for backing device here */
>                 bch_count_backing_io_errors(dc, bio);
> +               orig_bio->bi_status = bio->bi_status;
>         }
>
> -       kfree(ddip);
> -       bio_endio(bio);
> +       bio_put(bio);
> +       bio_endio(orig_bio);
>  }
>
> -static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
> -               struct block_device *orig_bdev, unsigned long start_time)
> +static void detached_dev_do_request(struct bcache_device *d,
> +               struct bio *orig_bio, unsigned long start_time)
>  {
>         struct detached_dev_io_private *ddip;
>         struct cached_dev *dc = container_of(d, struct cached_dev, disk);
> +       struct bio *clone_bio;
>
> -       /*
> -        * no need to call closure_get(&dc->disk.cl),
> -        * because upper layer had already opened bcache device,
> -        * which would call closure_get(&dc->disk.cl)
> -        */
> -       ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
> -       if (!ddip) {
> -               bio->bi_status = BLK_STS_RESOURCE;
> -               bio_endio(bio);
> +       if (bio_op(orig_bio) == REQ_OP_DISCARD &&
> +           !bdev_max_discard_sectors(dc->bdev)) {
> +               bio_endio(orig_bio);
>                 return;
>         }
>
> -       ddip->d = d;
> +       clone_bio = bio_alloc_clone(dc->bdev, orig_bio, GFP_NOIO,
> +                                   &d->bio_detach);
> +       if (!clone_bio) {
> +               orig_bio->bi_status = BLK_STS_RESOURCE;
> +               bio_endio(orig_bio);
> +               return;
> +       }
> +
> +       ddip = container_of(clone_bio, struct detached_dev_io_private, bio);
>         /* Count on the bcache device */
> -       ddip->orig_bdev = orig_bdev;
> +       ddip->d = d;
>         ddip->start_time = start_time;
> -       ddip->bi_end_io = bio->bi_end_io;
> -       ddip->bi_private = bio->bi_private;
> -       bio->bi_end_io = detached_dev_end_io;
> -       bio->bi_private = ddip;
> -
> -       if ((bio_op(bio) == REQ_OP_DISCARD) &&
> -           !bdev_max_discard_sectors(dc->bdev))
> -               detached_dev_end_io(bio);
> -       else
> -               submit_bio_noacct(bio);
> +       ddip->orig_bio = orig_bio;
> +
> +       clone_bio->bi_end_io = detached_dev_end_io;
> +       clone_bio->bi_private = dc;
> +
> +       submit_bio_noacct(clone_bio);
>  }
>
>  static void quit_max_writeback_rate(struct cache_set *c,
> @@ -1214,10 +1204,10 @@ void cached_dev_submit_bio(struct bio *bio)
>
>         start_time = bio_start_io_acct(bio);
>
> -       bio_set_dev(bio, dc->bdev);
>         bio->bi_iter.bi_sector += dc->sb.data_offset;
>
>         if (cached_dev_get(dc)) {
> +               bio_set_dev(bio, dc->bdev);
>                 s = search_alloc(bio, d, orig_bdev, start_time);
>                 trace_bcache_request_start(s->d, bio);
>
> @@ -1237,9 +1227,10 @@ void cached_dev_submit_bio(struct bio *bio)
>                         else
>                                 cached_dev_read(dc, s);
>                 }
> -       } else
> +       } else {
>                 /* I/O request sent to backing device */
> -               detached_dev_do_request(d, bio, orig_bdev, start_time);
> +               detached_dev_do_request(d, bio, start_time);
> +       }
>  }
>
>  static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index c17d4517af2..d4b798668c8 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -887,6 +887,7 @@ static void bcache_device_free(struct bcache_device *d)
>         }
>
>         bioset_exit(&d->bio_split);
> +       bioset_exit(&d->bio_detach);
>         kvfree(d->full_dirty_stripes);
>         kvfree(d->stripe_sectors_dirty);
>
> @@ -949,6 +950,11 @@ static int bcache_device_init(struct
> bcache_device *d, unsigned int block_size,
>                         BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
>                 goto out_ida_remove;
>
> +       if (bioset_init(&d->bio_detach, 4,
> +                       offsetof(struct detached_dev_io_private, bio),
> +                       BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
> +               goto out_bioset_split_exit;
> +
>         if (lim.logical_block_size > PAGE_SIZE && cached_bdev) {
>                 /*
>                  * This should only happen with BCACHE_SB_VERSION_BDEV.
> @@ -964,7 +970,7 @@ static int bcache_device_init(struct bcache_device
> *d, unsigned int block_size,
>
>         d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
>         if (IS_ERR(d->disk))
> -               goto out_bioset_exit;
> +               goto out_bioset_detach_exit;
>
>         set_capacity(d->disk, sectors);
>         snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
> @@ -976,7 +982,9 @@ static int bcache_device_init(struct bcache_device
> *d, unsigned int block_size,
>         d->disk->private_data   = d;
>         return 0;
>
> -out_bioset_exit:
> +out_bioset_detach_exit:
> +       bioset_exit(&d->bio_detach);
> +out_bioset_split_exit:
>         bioset_exit(&d->bio_split);
>  out_ida_remove:
>         ida_free(&bcache_device_idx, idx);
> --
> 2.34.1

I’ve tested this patch with the script below, and the results look good.
I couldn't find a standard test suite for the project, but I’d be
happy to integrate
these tests into it if needed. Just let me know.

Thanks,
Shida
-----
#!/bin/bash
# cycle_test.sh - Automation for bcache detached bio-cloning patch

# --- CONFIGURATION ---
BACKING_DEV="/dev/sdb1"
CACHE_DEV="/dev/nvme0n1p1"
BCACHE_DEV="/dev/bcache0"
ITERATIONS=3
FIO_RUNTIME=60

set -e

log() { echo -e "\n[$(date +%T)] $1"; }

# --- CLEANUP HANDLER ---
cleanup() {
    set +e
    log "CLEANING UP RESOURCES..."

    if pgrep fio > /dev/null; then
        sudo pkill -9 fio
    fi

    # 1. Stop the logical bcache device
    if [ -b "$BCACHE_DEV" ]; then
        BDEV_NAME=$(basename "$BCACHE_DEV")
        echo 1 | sudo tee /sys/block/$BDEV_NAME/bcache/stop >
/dev/null 2>&1 || true
    fi

    # 2. Unregister the backing device specifically if it's still active
    # This is often the cause of "Device busy" during wipefs
    BACKING_NAME=$(basename "$BACKING_DEV")
    if [ -d "/sys/block/$BACKING_NAME/bcache" ]; then
        echo 1 | sudo tee /sys/block/$BACKING_NAME/bcache/stop >
/dev/null 2>&1 || true
    fi

    # 3. Unregister all bcache cache sets
    for cset in /sys/fs/bcache/*-*-*-*-*; do
        if [ -d "$cset" ]; then
            echo 1 | sudo tee "$cset/unregister" > /dev/null 2>&1 || true
        fi
    done

    # 4. Wait for kernel/udev to catch up
    sudo udevadm settle
    sleep 2
    log "Cleanup complete."
}

# Trap for unexpected exits
trap cleanup EXIT SIGINT SIGTERM

check_deps() {
    for cmd in make-bcache fio iostat wipefs bc pgrep udevadm; do
        if ! command -v $cmd &> /dev/null; then
            echo "Error: $cmd not found."
            exit 1
        fi
    done
}

run_cycle() {
    local i=$1
    log ">>> STARTING CYCLE #$i"

    # 1. Clean and Initialize
    log "Wiping devices..."
    # If wipefs fails, we try a quick dd to clear the headers
    sudo wipefs -a $BACKING_DEV || (sudo dd if=/dev/zero
of=$BACKING_DEV bs=1M count=10 && sudo wipefs -a $BACKING_DEV)
    sudo wipefs -a $CACHE_DEV

    log "Creating bcache..."
    sudo make-bcache -B $BACKING_DEV -C $CACHE_DEV
    sleep 3

    if [ ! -b "$BCACHE_DEV" ]; then
        echo "Error: $BCACHE_DEV did not initialize."
        exit 1
    fi

    # 2. Detach
    log "Detaching backing device..."
    BDEV_NAME=$(basename $BCACHE_DEV)
    echo 1 | sudo tee /sys/block/$BDEV_NAME/bcache/detach > /dev/null

    STATE=$(cat /sys/block/$BDEV_NAME/bcache/state)
    log "Device state: $STATE"

    # 3. Stress Test
    log "Running fio stress test (${FIO_RUNTIME}s)..."
    sudo fio --name=bcache_test --filename=$BCACHE_DEV --rw=randrw --bs=4k \
         --direct=1 --ioengine=libaio --iodepth=64 --runtime=$FIO_RUNTIME \
         --numjobs=4 --group_reporting --minimal > /dev/null

    # 4. Validation
    log "Validating I/O accounting..."
    sleep 5

    STATS_LINE=$(iostat -x 1 2 $BDEV_NAME | grep -w "$BDEV_NAME" | tail -n 1)
    UTIL=$(echo "$STATS_LINE" | awk '{print $NF}')
    log "Final Stats -> %util: $UTIL"

    if (( $(echo "$UTIL > 1.0" | bc -l) )); then
        echo "!!! FAILURE: Accounting leak detected!"
        exit 1
    fi

    # 5. Cycle Teardown
    log "Cycle $i complete. Teardown..."
    # We call our cleanup function logic manually to ensure a clean
slate for the next iteration
    cleanup

    # Re-enable 'exit on error' for the next loop
    set -e
    log ">>> CYCLE #$i FINISHED"
}

# --- Main ---
check_deps
sudo -v

for ((c=1; c<=ITERATIONS; c++)); do
    run_cycle $c
    # Extra breather between cycles
    sleep 2
done

log "ALL CYCLES PASSED."
trap - EXIT
----