[PATCH] dm: optimize flushes

Mikulas Patocka posted 1 patch 1 year, 8 months ago
There is a newer version of this series
drivers/md/dm-core.h          |    4 ++-
drivers/md/dm-linear.c        |    1
drivers/md/dm-stripe.c        |    1
drivers/md/dm-table.c         |    4 +++
drivers/md/dm.c               |   47 +++++++++++++++++++++++++++++-------------
include/linux/device-mapper.h |    5 ++++
6 files changed, 47 insertions(+), 15 deletions(-)
[PATCH] dm: optimize flushes
Posted by Mikulas Patocka 1 year, 8 months ago
Device mapper sends flush bios to all the targets and the targets send it
to the underlying device. That may be inefficient, for example if a table
contains 10 linear targets pointing to the same physical device, then
device mapper would send 10 flush bios to that device - despite the fact
that only one bio would be sufficient.

This commit optimizes the flush behavior. It introduces a per-target
variable flush_pass_around - it is set when the target supports flush
optimization - currently, the dm-linear and dm-stripe targets support it.
When all the targets in a table have flush_pass_around, flush_pass_around
on the table is set. __send_empty_flush tests if the table has
flush_pass_around - and if it has, no flush bios are sent to the targets
and the list dm_table->devices is iterated and the flush bios are sent to
each member of the list.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-by: Yang Yang <yang.yang@vivo.com>

---
 drivers/md/dm-core.h          |    4 ++-
 drivers/md/dm-linear.c        |    1 
 drivers/md/dm-stripe.c        |    1 
 drivers/md/dm-table.c         |    4 +++
 drivers/md/dm.c               |   47 +++++++++++++++++++++++++++++-------------
 include/linux/device-mapper.h |    5 ++++
 6 files changed, 47 insertions(+), 15 deletions(-)

Index: linux-2.6/drivers/md/dm-core.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-core.h	2024-05-15 16:56:49.000000000 +0200
+++ linux-2.6/drivers/md/dm-core.h	2024-05-15 16:56:49.000000000 +0200
@@ -206,7 +206,9 @@ struct dm_table {
 
 	bool integrity_supported:1;
 	bool singleton:1;
-	unsigned integrity_added:1;
+	bool integrity_added:1;
+	/* set if all the targets in the table have "flush_pass_around" set */
+	bool flush_pass_around:1;
 
 	/*
 	 * Indicates the rw permissions for the new logical device.  This
Index: linux-2.6/drivers/md/dm-linear.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-linear.c	2024-05-15 16:56:49.000000000 +0200
+++ linux-2.6/drivers/md/dm-linear.c	2024-05-15 16:56:49.000000000 +0200
@@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *
 	ti->num_discard_bios = 1;
 	ti->num_secure_erase_bios = 1;
 	ti->num_write_zeroes_bios = 1;
+	ti->flush_pass_around = true;
 	ti->private = lc;
 	return 0;
 
Index: linux-2.6/drivers/md/dm-stripe.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-stripe.c	2024-05-15 16:56:49.000000000 +0200
+++ linux-2.6/drivers/md/dm-stripe.c	2024-05-15 16:56:49.000000000 +0200
@@ -157,6 +157,7 @@ static int stripe_ctr(struct dm_target *
 	ti->num_discard_bios = stripes;
 	ti->num_secure_erase_bios = stripes;
 	ti->num_write_zeroes_bios = stripes;
+	ti->flush_pass_around = true;
 
 	sc->chunk_size = chunk_size;
 	if (chunk_size & (chunk_size - 1))
Index: linux-2.6/drivers/md/dm-table.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
+++ linux-2.6/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
@@ -160,6 +160,7 @@ int dm_table_create(struct dm_table **re
 	t->type = DM_TYPE_NONE;
 	t->mode = mode;
 	t->md = md;
+	t->flush_pass_around = 1;
 	*result = t;
 	return 0;
 }
@@ -738,6 +739,9 @@ int dm_table_add_target(struct dm_table
 	if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
 		static_branch_enable(&swap_bios_enabled);
 
+	if (!ti->flush_pass_around)
+		t->flush_pass_around = false;
+
 	return 0;
 
  bad:
Index: linux-2.6/include/linux/device-mapper.h
===================================================================
--- linux-2.6.orig/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
+++ linux-2.6/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
@@ -397,6 +397,11 @@ struct dm_target {
 	 * bio_set_dev(). NOTE: ideally a target should _not_ need this.
 	 */
 	bool needs_bio_set_dev:1;
+
+	/*
+	 * Set if the target supports flush optimization
+	 */
+	bool flush_pass_around:1;
 };
 
 void *dm_per_bio_data(struct bio *bio, size_t data_size);
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c	2024-05-15 16:56:49.000000000 +0200
+++ linux-2.6/drivers/md/dm.c	2024-05-16 20:06:32.000000000 +0200
@@ -645,7 +645,7 @@ static struct bio *alloc_tio(struct clon
 
 	/* Set default bdev, but target must bio_set_dev() before issuing IO */
 	clone->bi_bdev = md->disk->part0;
-	if (unlikely(ti->needs_bio_set_dev))
+	if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev))
 		bio_set_dev(clone, md->disk->part0);
 
 	if (len) {
@@ -1107,7 +1107,7 @@ static void clone_endio(struct bio *bio)
 	blk_status_t error = bio->bi_status;
 	struct dm_target_io *tio = clone_to_tio(bio);
 	struct dm_target *ti = tio->ti;
-	dm_endio_fn endio = ti->type->end_io;
+	dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL;
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = io->md;
 
@@ -1154,7 +1154,7 @@ static void clone_endio(struct bio *bio)
 	}
 
 	if (static_branch_unlikely(&swap_bios_enabled) &&
-	    unlikely(swap_bios_limit(ti, bio)))
+	    likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio)))
 		up(&md->swap_bios_semaphore);
 
 	free_tio(bio);
@@ -1566,17 +1566,36 @@ static void __send_empty_flush(struct cl
 	ci->sector_count = 0;
 	ci->io->tio.clone.bi_iter.bi_size = 0;
 
-	for (unsigned int i = 0; i < t->num_targets; i++) {
-		unsigned int bios;
-		struct dm_target *ti = dm_table_get_target(t, i);
-
-		if (unlikely(ti->num_flush_bios == 0))
-			continue;
-
-		atomic_add(ti->num_flush_bios, &ci->io->io_count);
-		bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
-					     NULL, GFP_NOWAIT);
-		atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
+	if (!t->flush_pass_around) {
+		for (unsigned int i = 0; i < t->num_targets; i++) {
+			unsigned int bios;
+			struct dm_target *ti = dm_table_get_target(t, i);
+
+			if (unlikely(ti->num_flush_bios == 0))
+				continue;
+
+			atomic_add(ti->num_flush_bios, &ci->io->io_count);
+			bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
+						     NULL, GFP_NOWAIT);
+			atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
+		}
+	} else {
+		/*
+		 * Note that there's no need to grab t->devices_lock here
+		 * because the targets that support flush pass-around don't
+		 * modify the list of devices.
+		 */
+		struct list_head *devices = dm_table_get_devices(t);
+		unsigned int len = 0;
+		struct dm_dev_internal *dd;
+		list_for_each_entry(dd, devices, list) {
+			struct bio *clone;
+			clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO);
+			atomic_add(1, &ci->io->io_count);
+			bio_set_dev(clone, dd->dm_dev->bdev);
+			clone->bi_end_io = clone_endio;
+			dm_submit_bio_remap(clone, NULL);
+		}
 	}
 
 	/*
Re: dm: optimize flushes
Posted by Mike Snitzer 1 year, 8 months ago
On Thu, May 16, 2024 at 10:49:55PM +0200, Mikulas Patocka wrote:
> Device mapper sends flush bios to all the targets and the targets send it
> to the underlying device. That may be inefficient, for example if a table
> contains 10 linear targets pointing to the same physical device, then
> device mapper would send 10 flush bios to that device - despite the fact
> that only one bio would be sufficient.
> 
> This commit optimizes the flush behavior. It introduces a per-target
> variable flush_pass_around - it is set when the target supports flush
> optimization - currently, the dm-linear and dm-stripe targets support it.
> When all the targets in a table have flush_pass_around, flush_pass_around
> on the table is set. __send_empty_flush tests if the table has
> flush_pass_around - and if it has, no flush bios are sent to the targets
> and the list dm_table->devices is iterated and the flush bios are sent to
> each member of the list.

What does "pass around" mean?  Seems like an awkward name for this.
(Naming can be hard, I don't have better suggestions at the moment.)

> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> Reported-by: Yang Yang <yang.yang@vivo.com>
> 
> ---
>  drivers/md/dm-core.h          |    4 ++-
>  drivers/md/dm-linear.c        |    1 
>  drivers/md/dm-stripe.c        |    1 
>  drivers/md/dm-table.c         |    4 +++
>  drivers/md/dm.c               |   47 +++++++++++++++++++++++++++++-------------
>  include/linux/device-mapper.h |    5 ++++
>  6 files changed, 47 insertions(+), 15 deletions(-)
> 
> Index: linux-2.6/drivers/md/dm-core.h
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-core.h	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-core.h	2024-05-15 16:56:49.000000000 +0200
> @@ -206,7 +206,9 @@ struct dm_table {
>  
>  	bool integrity_supported:1;
>  	bool singleton:1;
> -	unsigned integrity_added:1;
> +	bool integrity_added:1;
> +	/* set if all the targets in the table have "flush_pass_around" set */
> +	bool flush_pass_around:1;
>  
>  	/*
>  	 * Indicates the rw permissions for the new logical device.  This
> Index: linux-2.6/drivers/md/dm-linear.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-linear.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-linear.c	2024-05-15 16:56:49.000000000 +0200
> @@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *
>  	ti->num_discard_bios = 1;
>  	ti->num_secure_erase_bios = 1;
>  	ti->num_write_zeroes_bios = 1;
> +	ti->flush_pass_around = true;
>  	ti->private = lc;
>  	return 0;
>  
> Index: linux-2.6/drivers/md/dm-stripe.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-stripe.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-stripe.c	2024-05-15 16:56:49.000000000 +0200
> @@ -157,6 +157,7 @@ static int stripe_ctr(struct dm_target *
>  	ti->num_discard_bios = stripes;
>  	ti->num_secure_erase_bios = stripes;
>  	ti->num_write_zeroes_bios = stripes;
> +	ti->flush_pass_around = true;
>  
>  	sc->chunk_size = chunk_size;
>  	if (chunk_size & (chunk_size - 1))
> Index: linux-2.6/drivers/md/dm-table.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
> @@ -160,6 +160,7 @@ int dm_table_create(struct dm_table **re
>  	t->type = DM_TYPE_NONE;
>  	t->mode = mode;
>  	t->md = md;
> +	t->flush_pass_around = 1;
>  	*result = t;
>  	return 0;
>  }

Should be: t->flush_pass_around = true;

> @@ -738,6 +739,9 @@ int dm_table_add_target(struct dm_table
>  	if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
>  		static_branch_enable(&swap_bios_enabled);
>  
> +	if (!ti->flush_pass_around)
> +		t->flush_pass_around = false;
> +
>  	return 0;
>  
>   bad:
> Index: linux-2.6/include/linux/device-mapper.h
> ===================================================================
> --- linux-2.6.orig/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
> @@ -397,6 +397,11 @@ struct dm_target {
>  	 * bio_set_dev(). NOTE: ideally a target should _not_ need this.
>  	 */
>  	bool needs_bio_set_dev:1;
> +
> +	/*
> +	 * Set if the target supports flush optimization
> +	 */
> +	bool flush_pass_around:1;
>  };

How does a developer _know_ if a target can set this flag?  Please
elaborate on the requirements in this code comment.

>  
>  void *dm_per_bio_data(struct bio *bio, size_t data_size);
> Index: linux-2.6/drivers/md/dm.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm.c	2024-05-16 20:06:32.000000000 +0200
> @@ -645,7 +645,7 @@ static struct bio *alloc_tio(struct clon
>  
>  	/* Set default bdev, but target must bio_set_dev() before issuing IO */
>  	clone->bi_bdev = md->disk->part0;
> -	if (unlikely(ti->needs_bio_set_dev))
> +	if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev))
>  		bio_set_dev(clone, md->disk->part0);
>  
>  	if (len) {
> @@ -1107,7 +1107,7 @@ static void clone_endio(struct bio *bio)
>  	blk_status_t error = bio->bi_status;
>  	struct dm_target_io *tio = clone_to_tio(bio);
>  	struct dm_target *ti = tio->ti;
> -	dm_endio_fn endio = ti->type->end_io;
> +	dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL;
>  	struct dm_io *io = tio->io;
>  	struct mapped_device *md = io->md;
>  
> @@ -1154,7 +1154,7 @@ static void clone_endio(struct bio *bio)
>  	}
>  
>  	if (static_branch_unlikely(&swap_bios_enabled) &&
> -	    unlikely(swap_bios_limit(ti, bio)))
> +	    likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio)))
>  		up(&md->swap_bios_semaphore);
>  
>  	free_tio(bio);

What is it about this commit that makes it important to verify ti
isn't NULL in the above 3 hunks?

Should these NULL checks be factored out as a separate fix?

Or can these hunks be dropped?

> @@ -1566,17 +1566,36 @@ static void __send_empty_flush(struct cl
>  	ci->sector_count = 0;
>  	ci->io->tio.clone.bi_iter.bi_size = 0;
>  
> -	for (unsigned int i = 0; i < t->num_targets; i++) {
> -		unsigned int bios;
> -		struct dm_target *ti = dm_table_get_target(t, i);
> -
> -		if (unlikely(ti->num_flush_bios == 0))
> -			continue;
> -
> -		atomic_add(ti->num_flush_bios, &ci->io->io_count);
> -		bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> -					     NULL, GFP_NOWAIT);
> -		atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> +	if (!t->flush_pass_around) {
> +		for (unsigned int i = 0; i < t->num_targets; i++) {
> +			unsigned int bios;
> +			struct dm_target *ti = dm_table_get_target(t, i);
> +
> +			if (unlikely(ti->num_flush_bios == 0))
> +				continue;
> +
> +			atomic_add(ti->num_flush_bios, &ci->io->io_count);
> +			bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> +						     NULL, GFP_NOWAIT);
> +			atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> +		}
> +	} else {
> +		/*
> +		 * Note that there's no need to grab t->devices_lock here
> +		 * because the targets that support flush pass-around don't
> +		 * modify the list of devices.
> +		 */
> +		struct list_head *devices = dm_table_get_devices(t);
> +		unsigned int len = 0;
> +		struct dm_dev_internal *dd;
> +		list_for_each_entry(dd, devices, list) {
> +			struct bio *clone;
> +			clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO);
> +			atomic_add(1, &ci->io->io_count);
> +			bio_set_dev(clone, dd->dm_dev->bdev);
> +			clone->bi_end_io = clone_endio;
> +			dm_submit_bio_remap(clone, NULL);
> +		}
>  	}
>  
>  	/*
> 
> 

Still missing what "pass-around" is meant to convey given that you
aren't passing around the same flush... you're cloning a new flush and
issuing one per device.  Probably worth explaining that's what you
mean by "flush_pass_around" (both in commit header and elaborate in
code)?

Also, you're issuing a flush to _all_ devices in a table. Not just
the data devices.  .iterate_devices returns only the data devices.
If/when there is a need to extend this feature to targets that have
metadata devices (e.g. dm-thin, cache, etc): would it make sense to
filter out non-data devices (by stepping through each target in the
table and using iterate_devices)?

Mike
Re: dm: optimize flushes
Posted by Eric Wheeler 1 year, 8 months ago
On Wed, 22 May 2024, Mike Snitzer wrote:
> On Thu, May 16, 2024 at 10:49:55PM +0200, Mikulas Patocka wrote:
> > Device mapper sends flush bios to all the targets and the targets send it
> > to the underlying device. That may be inefficient, for example if a table
> > contains 10 linear targets pointing to the same physical device, then
> > device mapper would send 10 flush bios to that device - despite the fact
> > that only one bio would be sufficient.
> > 
> > This commit optimizes the flush behavior. It introduces a per-target
> > variable flush_pass_around - it is set when the target supports flush
> > optimization - currently, the dm-linear and dm-stripe targets support it.
> > When all the targets in a table have flush_pass_around, flush_pass_around
> > on the table is set. __send_empty_flush tests if the table has
> > flush_pass_around - and if it has, no flush bios are sent to the targets
> > and the list dm_table->devices is iterated and the flush bios are sent to
> > each member of the list.
> 
> What does "pass around" mean?  Seems like an awkward name for this.
> (Naming can be hard, I don't have better suggestions at the moment.)

just playing with naming ideas from other disciplines in case you likes 
one of these concepts better than "pass around".  I'm not attached any of 
these, this is just for making conversation so the flag can be easily 
understood:

	- flush_can_scatter (as in scatter/gather)
	- flush_can_distribute
	- flush_can_spread
	- flush_deduplicate
	
...

> > Index: linux-2.6/include/linux/device-mapper.h
> > ===================================================================
> > --- linux-2.6.orig/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
> > +++ linux-2.6/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
> > @@ -397,6 +397,11 @@ struct dm_target {
> >  	 * bio_set_dev(). NOTE: ideally a target should _not_ need this.
> >  	 */
> >  	bool needs_bio_set_dev:1;
> > +
> > +	/*
> > +	 * Set if the target supports flush optimization
> > +	 */
> > +	bool flush_pass_around:1;
> >  };
> 
> How does a developer _know_ if a target can set this flag?  Please
> elaborate on the requirements in this code comment.

Relatedly,

To what extent can this be set automatically? For example, if you have a 
bunch of non-DM (eg, SCSI) disks under a device mapper target, then it 
seems reasonable that they would "support" this feature in the identity 
sense: they can take flush and it will (should) not be spread to other 
devices in the DM stack, so the device mapper targets being instantiated 
in such a case would enable this flag.  Thus, a new target that only has 
non-DM devices can (probably?) default enabled; maybe there are counter 
examples here.

Another consideration is for targets (eg, dm-thinpool) which have multiple 
lower-level block devices on the same table definition line, often a data 
and metadata volume. By contrast, linear tables may have multiple backing 
devices in separate target table lines.  This may be further complicated 
by the fact that a device mapper target can be composed of multiple 
disparate targets as separate table lines, each of which may have a 
different number of backing devices for their own definition.

Perhaps your design already covers these edge cases, so I am only 
mentioning this in case it may prompt ideas for other edge cases 
to review.


--
Eric Wheeler



> 
> >  
> >  void *dm_per_bio_data(struct bio *bio, size_t data_size);
> > Index: linux-2.6/drivers/md/dm.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/md/dm.c	2024-05-15 16:56:49.000000000 +0200
> > +++ linux-2.6/drivers/md/dm.c	2024-05-16 20:06:32.000000000 +0200
> > @@ -645,7 +645,7 @@ static struct bio *alloc_tio(struct clon
> >  
> >  	/* Set default bdev, but target must bio_set_dev() before issuing IO */
> >  	clone->bi_bdev = md->disk->part0;
> > -	if (unlikely(ti->needs_bio_set_dev))
> > +	if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev))
> >  		bio_set_dev(clone, md->disk->part0);
> >  
> >  	if (len) {
> > @@ -1107,7 +1107,7 @@ static void clone_endio(struct bio *bio)
> >  	blk_status_t error = bio->bi_status;
> >  	struct dm_target_io *tio = clone_to_tio(bio);
> >  	struct dm_target *ti = tio->ti;
> > -	dm_endio_fn endio = ti->type->end_io;
> > +	dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL;
> >  	struct dm_io *io = tio->io;
> >  	struct mapped_device *md = io->md;
> >  
> > @@ -1154,7 +1154,7 @@ static void clone_endio(struct bio *bio)
> >  	}
> >  
> >  	if (static_branch_unlikely(&swap_bios_enabled) &&
> > -	    unlikely(swap_bios_limit(ti, bio)))
> > +	    likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio)))
> >  		up(&md->swap_bios_semaphore);
> >  
> >  	free_tio(bio);
> 
> What is it about this commit that makes it important to verify ti
> isn't NULL in the above 3 hunks?
> 
> Should these NULL checks be factored out as a separate fix?
> 
> Or can these hunks be dropped?
> 
> > @@ -1566,17 +1566,36 @@ static void __send_empty_flush(struct cl
> >  	ci->sector_count = 0;
> >  	ci->io->tio.clone.bi_iter.bi_size = 0;
> >  
> > -	for (unsigned int i = 0; i < t->num_targets; i++) {
> > -		unsigned int bios;
> > -		struct dm_target *ti = dm_table_get_target(t, i);
> > -
> > -		if (unlikely(ti->num_flush_bios == 0))
> > -			continue;
> > -
> > -		atomic_add(ti->num_flush_bios, &ci->io->io_count);
> > -		bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> > -					     NULL, GFP_NOWAIT);
> > -		atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> > +	if (!t->flush_pass_around) {
> > +		for (unsigned int i = 0; i < t->num_targets; i++) {
> > +			unsigned int bios;
> > +			struct dm_target *ti = dm_table_get_target(t, i);
> > +
> > +			if (unlikely(ti->num_flush_bios == 0))
> > +				continue;
> > +
> > +			atomic_add(ti->num_flush_bios, &ci->io->io_count);
> > +			bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> > +						     NULL, GFP_NOWAIT);
> > +			atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> > +		}
> > +	} else {
> > +		/*
> > +		 * Note that there's no need to grab t->devices_lock here
> > +		 * because the targets that support flush pass-around don't
> > +		 * modify the list of devices.
> > +		 */
> > +		struct list_head *devices = dm_table_get_devices(t);
> > +		unsigned int len = 0;
> > +		struct dm_dev_internal *dd;
> > +		list_for_each_entry(dd, devices, list) {
> > +			struct bio *clone;
> > +			clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO);
> > +			atomic_add(1, &ci->io->io_count);
> > +			bio_set_dev(clone, dd->dm_dev->bdev);
> > +			clone->bi_end_io = clone_endio;
> > +			dm_submit_bio_remap(clone, NULL);
> > +		}
> >  	}
> >  
> >  	/*
> > 
> > 
> 
> Still missing what "pass-around" is meant to convey given that you
> aren't passing around the same flush... you're cloning a new flush and
> issuing one per device.  Probably worth explaining that's what you
> mean by "flush_pass_around" (both in commit header and elaborate in
> code)?
> 
> Also, you're issuing a flush to _all_ devices in a table. Not just
> the data devices.  .iterate_devices returns only the data devices.
> If/when there is a need to extend this feature to targets that have
> metadata devices (e.g. dm-thin, cache, etc): would it make sense to
> filter out non-data devices (by stepping through each target in the
> table and using iterate_devices)?
> 
> Mike
> 
>
Re: dm: optimize flushes
Posted by Mikulas Patocka 1 year, 8 months ago

On Wed, 22 May 2024, Mike Snitzer wrote:

> On Thu, May 16, 2024 at 10:49:55PM +0200, Mikulas Patocka wrote:
> > Device mapper sends flush bios to all the targets and the targets send it
> > to the underlying device. That may be inefficient, for example if a table
> > contains 10 linear targets pointing to the same physical device, then
> > device mapper would send 10 flush bios to that device - despite the fact
> > that only one bio would be sufficient.
> > 
> > This commit optimizes the flush behavior. It introduces a per-target
> > variable flush_pass_around - it is set when the target supports flush
> > optimization - currently, the dm-linear and dm-stripe targets support it.
> > When all the targets in a table have flush_pass_around, flush_pass_around
> > on the table is set. __send_empty_flush tests if the table has
> > flush_pass_around - and if it has, no flush bios are sent to the targets
> > and the list dm_table->devices is iterated and the flush bios are sent to
> > each member of the list.
> 
> What does "pass around" mean?  Seems like an awkward name for this.
> (Naming can be hard, I don't have better suggestions at the moment.)

What about "flush_bypass" or "flush_bypasses_map"?

> > Index: linux-2.6/drivers/md/dm-table.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
> > +++ linux-2.6/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
> > @@ -160,6 +160,7 @@ int dm_table_create(struct dm_table **re
> >  	t->type = DM_TYPE_NONE;
> >  	t->mode = mode;
> >  	t->md = md;
> > +	t->flush_pass_around = 1;
> >  	*result = t;
> >  	return 0;
> >  }
> 
> Should be: t->flush_pass_around = true;

Yes.

> > +
> > +	/*
> > +	 * Set if the target supports flush optimization
> > +	 */
> > +	bool flush_pass_around:1;
> >  };
> 
> How does a developer _know_ if a target can set this flag?  Please
> elaborate on the requirements in this code comment.

What about:

"The target supports flush optimization. When all the targets in the table 
support flush optimization, flushes will not use the "map" method and they 
will be sent directly to all the devices in the table. This optimization 
reduces the number of flushes that are being sent if multiple targets use 
the same underlying device."

> >  
> >  void *dm_per_bio_data(struct bio *bio, size_t data_size);
> > Index: linux-2.6/drivers/md/dm.c
> > ===================================================================
> > --- linux-2.6.orig/drivers/md/dm.c	2024-05-15 16:56:49.000000000 +0200
> > +++ linux-2.6/drivers/md/dm.c	2024-05-16 20:06:32.000000000 +0200
> > @@ -645,7 +645,7 @@ static struct bio *alloc_tio(struct clon
> >  
> >  	/* Set default bdev, but target must bio_set_dev() before issuing IO */
> >  	clone->bi_bdev = md->disk->part0;
> > -	if (unlikely(ti->needs_bio_set_dev))
> > +	if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev))
> >  		bio_set_dev(clone, md->disk->part0);
> >  
> >  	if (len) {
> > @@ -1107,7 +1107,7 @@ static void clone_endio(struct bio *bio)
> >  	blk_status_t error = bio->bi_status;
> >  	struct dm_target_io *tio = clone_to_tio(bio);
> >  	struct dm_target *ti = tio->ti;
> > -	dm_endio_fn endio = ti->type->end_io;
> > +	dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL;
> >  	struct dm_io *io = tio->io;
> >  	struct mapped_device *md = io->md;
> >  
> > @@ -1154,7 +1154,7 @@ static void clone_endio(struct bio *bio)
> >  	}
> >  
> >  	if (static_branch_unlikely(&swap_bios_enabled) &&
> > -	    unlikely(swap_bios_limit(ti, bio)))
> > +	    likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio)))
> >  		up(&md->swap_bios_semaphore);
> >  
> >  	free_tio(bio);
> 
> What is it about this commit that makes it important to verify ti
> isn't NULL in the above 3 hunks?
> 
> Should these NULL checks be factored out as a separate fix?
> 
> Or can these hunks be dropped?

They can't be dropped.

When performing the flush bypass optimization, the dm core creates a 
dm_target_io structure that isn't associated with any specific target. So, 
the pointer "tio->ti" is NULL.

I could set "tio->ti" to any target, but I think it's better to set it to 
NULL, just to mark that there is no target association.

> > @@ -1566,17 +1566,36 @@ static void __send_empty_flush(struct cl
> >  	ci->sector_count = 0;
> >  	ci->io->tio.clone.bi_iter.bi_size = 0;
> >  
> > -	for (unsigned int i = 0; i < t->num_targets; i++) {
> > -		unsigned int bios;
> > -		struct dm_target *ti = dm_table_get_target(t, i);
> > -
> > -		if (unlikely(ti->num_flush_bios == 0))
> > -			continue;
> > -
> > -		atomic_add(ti->num_flush_bios, &ci->io->io_count);
> > -		bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> > -					     NULL, GFP_NOWAIT);
> > -		atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> > +	if (!t->flush_pass_around) {
> > +		for (unsigned int i = 0; i < t->num_targets; i++) {
> > +			unsigned int bios;
> > +			struct dm_target *ti = dm_table_get_target(t, i);
> > +
> > +			if (unlikely(ti->num_flush_bios == 0))
> > +				continue;
> > +
> > +			atomic_add(ti->num_flush_bios, &ci->io->io_count);
> > +			bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> > +						     NULL, GFP_NOWAIT);
> > +			atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> > +		}
> > +	} else {
> > +		/*
> > +		 * Note that there's no need to grab t->devices_lock here
> > +		 * because the targets that support flush pass-around don't
> > +		 * modify the list of devices.
> > +		 */
> > +		struct list_head *devices = dm_table_get_devices(t);
> > +		unsigned int len = 0;
> > +		struct dm_dev_internal *dd;
> > +		list_for_each_entry(dd, devices, list) {
> > +			struct bio *clone;
> > +			clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO);
                                              ^^^^
Here we set tio->ti to NULL.

> > +			atomic_add(1, &ci->io->io_count);
> > +			bio_set_dev(clone, dd->dm_dev->bdev);
> > +			clone->bi_end_io = clone_endio;
> > +			dm_submit_bio_remap(clone, NULL);
> > +		}
> >  	}
> >  
> >  	/*
> > 
> > 
> 
> Still missing what "pass-around" is meant to convey given that you
> aren't passing around the same flush... you're cloning a new flush and
> issuing one per device.  Probably worth explaining that's what you
> mean by "flush_pass_around" (both in commit header and elaborate in
> code)?

I mean that flushes bypass the map method.

> Also, you're issuing a flush to _all_ devices in a table. Not just
> the data devices.  .iterate_devices returns only the data devices.
> If/when there is a need to extend this feature to targets that have
> metadata devices (e.g. dm-thin, cache, etc): would it make sense to
> filter out non-data devices (by stepping through each target in the
> table and using iterate_devices)?

This optimization only makes sense if there are multiple targets in the 
table. dm-thin, dm-cache, dm-raid is usually the only target in the table, 
so the optimization doesn't make sense for them. Trying to support the 
"flush bypass" optimization for them would bloat the code without reducing 
the number of flush requests at all.

> Mike

Mikulas
[PATCH v2] dm: optimize flushes
Posted by Mikulas Patocka 1 year, 8 months ago
Hi

Here I'm resending the patch, with more comments and explanations added.

Mikulas


From: Mikulas Patocka <mpatocka@redhat.com>

Device mapper sends flush bios to all the targets and the targets send it
to the underlying device. That may be inefficient, for example if a table
contains 10 linear targets pointing to the same physical device, then
device mapper would send 10 flush bios to that device - despite the fact
that only one bio would be sufficient.

This commit optimizes the flush behavior. It introduces a per-target
variable flush_bypasses_map - it is set when the target supports flush
optimization - currently, the dm-linear and dm-stripe targets support it.
When all the targets in a table have flush_bypasses_map,
flush_bypasses_map on the table is set. __send_empty_flush tests if the
table has flush_bypasses_map - and if it has, no flush bios are sent to
the targets via the "map" method and the list dm_table->devices is
iterated and the flush bios are sent to each member of the list.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Suggested-by: Yang Yang <yang.yang@vivo.com>

---
 drivers/md/dm-core.h          |    4 ++-
 drivers/md/dm-linear.c        |    1 
 drivers/md/dm-stripe.c        |    1 
 drivers/md/dm-table.c         |    4 +++
 drivers/md/dm.c               |   54 +++++++++++++++++++++++++++++++-----------
 include/linux/device-mapper.h |   15 +++++++++++
 6 files changed, 64 insertions(+), 15 deletions(-)

Index: linux-2.6/drivers/md/dm-core.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-core.h	2024-05-23 19:00:00.000000000 +0200
+++ linux-2.6/drivers/md/dm-core.h	2024-05-23 19:00:00.000000000 +0200
@@ -206,7 +206,9 @@ struct dm_table {
 
 	bool integrity_supported:1;
 	bool singleton:1;
-	unsigned integrity_added:1;
+	bool integrity_added:1;
+	/* set if all the targets in the table have "flush_bypasses_map" set */
+	bool flush_bypasses_map:1;
 
 	/*
 	 * Indicates the rw permissions for the new logical device.  This
Index: linux-2.6/drivers/md/dm-linear.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-linear.c	2024-05-23 19:00:00.000000000 +0200
+++ linux-2.6/drivers/md/dm-linear.c	2024-05-23 19:00:00.000000000 +0200
@@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *
 	ti->num_discard_bios = 1;
 	ti->num_secure_erase_bios = 1;
 	ti->num_write_zeroes_bios = 1;
+	ti->flush_bypasses_map = true;
 	ti->private = lc;
 	return 0;
 
Index: linux-2.6/drivers/md/dm-stripe.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-stripe.c	2024-05-23 19:00:00.000000000 +0200
+++ linux-2.6/drivers/md/dm-stripe.c	2024-05-23 19:00:00.000000000 +0200
@@ -157,6 +157,7 @@ static int stripe_ctr(struct dm_target *
 	ti->num_discard_bios = stripes;
 	ti->num_secure_erase_bios = stripes;
 	ti->num_write_zeroes_bios = stripes;
+	ti->flush_bypasses_map = true;
 
 	sc->chunk_size = chunk_size;
 	if (chunk_size & (chunk_size - 1))
Index: linux-2.6/drivers/md/dm-table.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-table.c	2024-05-23 19:00:00.000000000 +0200
+++ linux-2.6/drivers/md/dm-table.c	2024-05-23 19:00:00.000000000 +0200
@@ -160,6 +160,7 @@ int dm_table_create(struct dm_table **re
 	t->type = DM_TYPE_NONE;
 	t->mode = mode;
 	t->md = md;
+	t->flush_bypasses_map = true;
 	*result = t;
 	return 0;
 }
@@ -738,6 +739,9 @@ int dm_table_add_target(struct dm_table
 	if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
 		static_branch_enable(&swap_bios_enabled);
 
+	if (!ti->flush_bypasses_map)
+		t->flush_bypasses_map = false;
+
 	return 0;
 
  bad:
Index: linux-2.6/include/linux/device-mapper.h
===================================================================
--- linux-2.6.orig/include/linux/device-mapper.h	2024-05-23 19:00:00.000000000 +0200
+++ linux-2.6/include/linux/device-mapper.h	2024-05-23 19:18:01.000000000 +0200
@@ -397,6 +397,21 @@ struct dm_target {
 	 * bio_set_dev(). NOTE: ideally a target should _not_ need this.
 	 */
 	bool needs_bio_set_dev:1;
+
+	/*
+	 * Set if the target supports flush optimization. If all the targets in
+	 * a table have flush_bypasses_map set, the dm core will not send
+	 * flushes to the targets via a ->map method. It will iterate over
+	 * dm_table->devices and send flushes to the devices directly. This
+	 * optimization reduces the number of flushes being sent when multiple
+	 * targets in a table use the same underlying device.
+	 *
+	 * This optimization may be enabled on targets that just pass the
+	 * flushes to the underlying devices without performing any other
+	 * actions on the flush request. Currently, dm-linear and dm-stripe
+	 * support it.
+	 */
+	bool flush_bypasses_map:1;
 };
 
 void *dm_per_bio_data(struct bio *bio, size_t data_size);
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c	2024-05-23 19:00:00.000000000 +0200
+++ linux-2.6/drivers/md/dm.c	2024-05-23 19:31:14.000000000 +0200
@@ -645,7 +645,7 @@ static struct bio *alloc_tio(struct clon
 
 	/* Set default bdev, but target must bio_set_dev() before issuing IO */
 	clone->bi_bdev = md->disk->part0;
-	if (unlikely(ti->needs_bio_set_dev))
+	if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev))
 		bio_set_dev(clone, md->disk->part0);
 
 	if (len) {
@@ -1107,7 +1107,7 @@ static void clone_endio(struct bio *bio)
 	blk_status_t error = bio->bi_status;
 	struct dm_target_io *tio = clone_to_tio(bio);
 	struct dm_target *ti = tio->ti;
-	dm_endio_fn endio = ti->type->end_io;
+	dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL;
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = io->md;
 
@@ -1154,7 +1154,7 @@ static void clone_endio(struct bio *bio)
 	}
 
 	if (static_branch_unlikely(&swap_bios_enabled) &&
-	    unlikely(swap_bios_limit(ti, bio)))
+	    likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio)))
 		up(&md->swap_bios_semaphore);
 
 	free_tio(bio);
@@ -1566,17 +1566,43 @@ static void __send_empty_flush(struct cl
 	ci->sector_count = 0;
 	ci->io->tio.clone.bi_iter.bi_size = 0;
 
-	for (unsigned int i = 0; i < t->num_targets; i++) {
-		unsigned int bios;
-		struct dm_target *ti = dm_table_get_target(t, i);
-
-		if (unlikely(ti->num_flush_bios == 0))
-			continue;
-
-		atomic_add(ti->num_flush_bios, &ci->io->io_count);
-		bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
-					     NULL, GFP_NOWAIT);
-		atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
+	if (!t->flush_bypasses_map) {
+		for (unsigned int i = 0; i < t->num_targets; i++) {
+			unsigned int bios;
+			struct dm_target *ti = dm_table_get_target(t, i);
+
+			if (unlikely(ti->num_flush_bios == 0))
+				continue;
+
+			atomic_add(ti->num_flush_bios, &ci->io->io_count);
+			bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
+						     NULL, GFP_NOWAIT);
+			atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
+		}
+	} else {
+		/*
+		 * Note that there's no need to grab t->devices_lock here
+		 * because the targets that support flush optimization don't
+		 * modify the list of devices.
+		 */
+		struct list_head *devices = dm_table_get_devices(t);
+		unsigned int len = 0;
+		struct dm_dev_internal *dd;
+		list_for_each_entry(dd, devices, list) {
+			struct bio *clone;
+			/*
+			 * Note that the structure dm_target_io is not
+			 * associated with any target (because the device may be
+			 * used by multiple targets), so we set tio->ti = NULL.
+			 * We must check for NULL in the I/O processing path, to
+			 * avoid NULL pointer dereference.
+			 */
+			clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO);
+			atomic_add(1, &ci->io->io_count);
+			bio_set_dev(clone, dd->dm_dev->bdev);
+			clone->bi_end_io = clone_endio;
+			dm_submit_bio_remap(clone, NULL);
+		}
 	}
 
 	/*
Re: [PATCH v2] dm: optimize flushes
Posted by Mike Snitzer 1 year, 8 months ago
On Thu, May 23, 2024 at 07:46:25PM +0200, Mikulas Patocka wrote:
> Hi
> 
> Here I'm resending the patch, with more comments and explanations added.
> 
> Mikulas
> 
> 
> From: Mikulas Patocka <mpatocka@redhat.com>
> 
> Device mapper sends flush bios to all the targets and the targets send it
> to the underlying device. That may be inefficient, for example if a table
> contains 10 linear targets pointing to the same physical device, then
> device mapper would send 10 flush bios to that device - despite the fact
> that only one bio would be sufficient.
> 
> This commit optimizes the flush behavior. It introduces a per-target
> variable flush_bypasses_map - it is set when the target supports flush
> optimization - currently, the dm-linear and dm-stripe targets support it.
> When all the targets in a table have flush_bypasses_map,
> flush_bypasses_map on the table is set. __send_empty_flush tests if the
> table has flush_bypasses_map - and if it has, no flush bios are sent to
> the targets via the "map" method and the list dm_table->devices is
> iterated and the flush bios are sent to each member of the list.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> Suggested-by: Yang Yang <yang.yang@vivo.com>

Nicely done, please feel free to stage for 6.11 (create a new
'dm-6.11' branch starting from 'dm-6.10' -- we'll need to rebase
dm-6.11 to 6.10-rc2 or so but at least we'll get this commit in the
pipeline, push to 'for-next').

Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Re: [PATCH v2] dm: optimize flushes
Posted by Mikulas Patocka 1 year, 8 months ago

On Thu, 23 May 2024, Mike Snitzer wrote:

> On Thu, May 23, 2024 at 07:46:25PM +0200, Mikulas Patocka wrote:
> > Hi
> > 
> > Here I'm resending the patch, with more comments and explanations added.
> > 
> > Mikulas
> > 
> > 
> > From: Mikulas Patocka <mpatocka@redhat.com>
> > 
> > Device mapper sends flush bios to all the targets and the targets send it
> > to the underlying device. That may be inefficient, for example if a table
> > contains 10 linear targets pointing to the same physical device, then
> > device mapper would send 10 flush bios to that device - despite the fact
> > that only one bio would be sufficient.
> > 
> > This commit optimizes the flush behavior. It introduces a per-target
> > variable flush_bypasses_map - it is set when the target supports flush
> > optimization - currently, the dm-linear and dm-stripe targets support it.
> > When all the targets in a table have flush_bypasses_map,
> > flush_bypasses_map on the table is set. __send_empty_flush tests if the
> > table has flush_bypasses_map - and if it has, no flush bios are sent to
> > the targets via the "map" method and the list dm_table->devices is
> > iterated and the flush bios are sent to each member of the list.
> > 
> > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > Suggested-by: Yang Yang <yang.yang@vivo.com>
> 
> Nicely done, please feel free to stage for 6.11 (create a new
> 'dm-6.11' branch starting from 'dm-6.10' -- we'll need to rebase
> dm-6.11 to 6.10-rc2 or so but at least we'll get this commit in the
> pipeline, push to 'for-next').
> 
> Reviewed-by: Mike Snitzer <snitzer@kernel.org>

OK, done.

Mikulas
Re: [PATCH] dm: optimize flushes
Posted by YangYang 1 year, 8 months ago
On 2024/5/17 4:49, Mikulas Patocka wrote:
> Device mapper sends flush bios to all the targets and the targets send it
> to the underlying device. That may be inefficient, for example if a table
> contains 10 linear targets pointing to the same physical device, then
> device mapper would send 10 flush bios to that device - despite the fact
> that only one bio would be sufficient.
> 
> This commit optimizes the flush behavior. It introduces a per-target
> variable flush_pass_around - it is set when the target supports flush
> optimization - currently, the dm-linear and dm-stripe targets support it.
> When all the targets in a table have flush_pass_around, flush_pass_around
> on the table is set. __send_empty_flush tests if the table has
> flush_pass_around - and if it has, no flush bios are sent to the targets
> and the list dm_table->devices is iterated and the flush bios are sent to
> each member of the list.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> Reported-by: Yang Yang <yang.yang@vivo.com>
> 
> ---
>   drivers/md/dm-core.h          |    4 ++-
>   drivers/md/dm-linear.c        |    1
>   drivers/md/dm-stripe.c        |    1
>   drivers/md/dm-table.c         |    4 +++
>   drivers/md/dm.c               |   47 +++++++++++++++++++++++++++++-------------
>   include/linux/device-mapper.h |    5 ++++
>   6 files changed, 47 insertions(+), 15 deletions(-)
> 
> Index: linux-2.6/drivers/md/dm-core.h
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-core.h	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-core.h	2024-05-15 16:56:49.000000000 +0200
> @@ -206,7 +206,9 @@ struct dm_table {
>   
>   	bool integrity_supported:1;
>   	bool singleton:1;
> -	unsigned integrity_added:1;
> +	bool integrity_added:1;
> +	/* set if all the targets in the table have "flush_pass_around" set */
> +	bool flush_pass_around:1;
>   
>   	/*
>   	 * Indicates the rw permissions for the new logical device.  This
> Index: linux-2.6/drivers/md/dm-linear.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-linear.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-linear.c	2024-05-15 16:56:49.000000000 +0200
> @@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *
>   	ti->num_discard_bios = 1;
>   	ti->num_secure_erase_bios = 1;
>   	ti->num_write_zeroes_bios = 1;
> +	ti->flush_pass_around = true;
>   	ti->private = lc;
>   	return 0;
>   
> Index: linux-2.6/drivers/md/dm-stripe.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-stripe.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-stripe.c	2024-05-15 16:56:49.000000000 +0200
> @@ -157,6 +157,7 @@ static int stripe_ctr(struct dm_target *
>   	ti->num_discard_bios = stripes;
>   	ti->num_secure_erase_bios = stripes;
>   	ti->num_write_zeroes_bios = stripes;
> +	ti->flush_pass_around = true;
>   
>   	sc->chunk_size = chunk_size;
>   	if (chunk_size & (chunk_size - 1))
> Index: linux-2.6/drivers/md/dm-table.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm-table.c	2024-05-15 16:56:49.000000000 +0200
> @@ -160,6 +160,7 @@ int dm_table_create(struct dm_table **re
>   	t->type = DM_TYPE_NONE;
>   	t->mode = mode;
>   	t->md = md;
> +	t->flush_pass_around = 1;
>   	*result = t;
>   	return 0;
>   }
> @@ -738,6 +739,9 @@ int dm_table_add_target(struct dm_table
>   	if (ti->limit_swap_bios && !static_key_enabled(&swap_bios_enabled.key))
>   		static_branch_enable(&swap_bios_enabled);
>   
> +	if (!ti->flush_pass_around)
> +		t->flush_pass_around = false;
> +
>   	return 0;
>   
>    bad:
> Index: linux-2.6/include/linux/device-mapper.h
> ===================================================================
> --- linux-2.6.orig/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/include/linux/device-mapper.h	2024-05-15 16:56:49.000000000 +0200
> @@ -397,6 +397,11 @@ struct dm_target {
>   	 * bio_set_dev(). NOTE: ideally a target should _not_ need this.
>   	 */
>   	bool needs_bio_set_dev:1;
> +
> +	/*
> +	 * Set if the target supports flush optimization
> +	 */
> +	bool flush_pass_around:1;
>   };
>   
>   void *dm_per_bio_data(struct bio *bio, size_t data_size);
> Index: linux-2.6/drivers/md/dm.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm.c	2024-05-15 16:56:49.000000000 +0200
> +++ linux-2.6/drivers/md/dm.c	2024-05-16 20:06:32.000000000 +0200
> @@ -645,7 +645,7 @@ static struct bio *alloc_tio(struct clon
>   
>   	/* Set default bdev, but target must bio_set_dev() before issuing IO */
>   	clone->bi_bdev = md->disk->part0;
> -	if (unlikely(ti->needs_bio_set_dev))
> +	if (likely(ti != NULL) && unlikely(ti->needs_bio_set_dev))
>   		bio_set_dev(clone, md->disk->part0);
>   
>   	if (len) {
> @@ -1107,7 +1107,7 @@ static void clone_endio(struct bio *bio)
>   	blk_status_t error = bio->bi_status;
>   	struct dm_target_io *tio = clone_to_tio(bio);
>   	struct dm_target *ti = tio->ti;
> -	dm_endio_fn endio = ti->type->end_io;
> +	dm_endio_fn endio = likely(ti != NULL) ? ti->type->end_io : NULL;
>   	struct dm_io *io = tio->io;
>   	struct mapped_device *md = io->md;
>   
> @@ -1154,7 +1154,7 @@ static void clone_endio(struct bio *bio)
>   	}
>   
>   	if (static_branch_unlikely(&swap_bios_enabled) &&
> -	    unlikely(swap_bios_limit(ti, bio)))
> +	    likely(ti != NULL) && unlikely(swap_bios_limit(ti, bio)))
>   		up(&md->swap_bios_semaphore);
>   
>   	free_tio(bio);
> @@ -1566,17 +1566,36 @@ static void __send_empty_flush(struct cl
>   	ci->sector_count = 0;
>   	ci->io->tio.clone.bi_iter.bi_size = 0;
>   
> -	for (unsigned int i = 0; i < t->num_targets; i++) {
> -		unsigned int bios;
> -		struct dm_target *ti = dm_table_get_target(t, i);
> -
> -		if (unlikely(ti->num_flush_bios == 0))
> -			continue;
> -
> -		atomic_add(ti->num_flush_bios, &ci->io->io_count);
> -		bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> -					     NULL, GFP_NOWAIT);
> -		atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> +	if (!t->flush_pass_around) {
> +		for (unsigned int i = 0; i < t->num_targets; i++) {
> +			unsigned int bios;
> +			struct dm_target *ti = dm_table_get_target(t, i);
> +
> +			if (unlikely(ti->num_flush_bios == 0))
> +				continue;
> +
> +			atomic_add(ti->num_flush_bios, &ci->io->io_count);
> +			bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
> +						     NULL, GFP_NOWAIT);
> +			atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
> +		}
> +	} else {
> +		/*
> +		 * Note that there's no need to grab t->devices_lock here
> +		 * because the targets that support flush pass-around don't
> +		 * modify the list of devices.
> +		 */
> +		struct list_head *devices = dm_table_get_devices(t);
> +		unsigned int len = 0;
> +		struct dm_dev_internal *dd;
> +		list_for_each_entry(dd, devices, list) {
> +			struct bio *clone;
> +			clone = alloc_tio(ci, NULL, 0, &len, GFP_NOIO);
> +			atomic_add(1, &ci->io->io_count);
> +			bio_set_dev(clone, dd->dm_dev->bdev);
> +			clone->bi_end_io = clone_endio;
> +			dm_submit_bio_remap(clone, NULL);
> +		}
>   	}
>   
>   	/*
> 

Thanks, I tested this patch, and it fixed the issue for me!