Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
Open issues:
- Locking?
- thin_seek_hole_data() does not run as a bio or request. This patch
assumes dm_thin_find_mapped_range() synchronously performs I/O if
metadata needs to be loaded from disk. Is that a valid assumption?
---
drivers/md/dm-thin.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 4793ad2aa1f7e..3c5dc4f0fe8a3 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4501,6 +4501,82 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
}
}
+static dm_block_t loff_to_block(struct pool *pool, loff_t offset)
+{
+ sector_t offset_sectors = offset >> SECTOR_SHIFT;
+ dm_block_t ret;
+
+ if (block_size_is_power_of_two(pool))
+ ret = offset_sectors >> pool->sectors_per_block_shift;
+ else {
+ ret = offset_sectors;
+ (void) sector_div(ret, pool->sectors_per_block);
+ }
+
+ return ret;
+}
+
+static loff_t block_to_loff(struct pool *pool, dm_block_t block)
+{
+ return block_to_sectors(pool, block) << SECTOR_SHIFT;
+}
+
+static loff_t thin_seek_hole_data(struct dm_target *ti, loff_t offset,
+ int whence)
+{
+ struct thin_c *tc = ti->private;
+ struct dm_thin_device *td = tc->td;
+ struct pool *pool = tc->pool;
+ dm_block_t begin;
+ dm_block_t end;
+ dm_block_t mapped_begin;
+ dm_block_t mapped_end;
+ dm_block_t pool_begin;
+ bool maybe_shared;
+ int ret;
+
+ /* TODO locking? */
+
+ if (block_size_is_power_of_two(pool))
+ end = ti->len >> pool->sectors_per_block_shift;
+ else {
+ end = ti->len;
+ (void) sector_div(end, pool->sectors_per_block);
+ }
+
+ offset -= ti->begin << SECTOR_SHIFT;
+
+ while (true) {
+ begin = loff_to_block(pool, offset);
+ ret = dm_thin_find_mapped_range(td, begin, end,
+ &mapped_begin, &mapped_end,
+ &pool_begin, &maybe_shared);
+ if (ret == -ENODATA) {
+ if (whence == SEEK_DATA)
+ return -ENXIO;
+ break;
+ } else if (ret < 0) {
+ /* TODO handle EWOULDBLOCK? */
+ return -ENXIO;
+ }
+
+ /* SEEK_DATA finishes here... */
+ if (whence == SEEK_DATA) {
+ if (mapped_begin != begin)
+ offset = block_to_loff(pool, mapped_begin);
+ break;
+ }
+
+ /* ...while SEEK_HOLE may need to look further */
+ if (mapped_begin != begin)
+ break; /* offset is in a hole */
+
+ offset = block_to_loff(pool, mapped_end);
+ }
+
+ return offset + (ti->begin << SECTOR_SHIFT);
+}
+
static struct target_type thin_target = {
.name = "thin",
.version = {1, 23, 0},
@@ -4515,6 +4591,7 @@ static struct target_type thin_target = {
.status = thin_status,
.iterate_devices = thin_iterate_devices,
.io_hints = thin_io_hints,
+ .seek_hole_data = thin_seek_hole_data,
};
/*----------------------------------------------------------------*/
--
2.44.0
On Thu, Mar 28, 2024 at 04:39:09PM -0400, Stefan Hajnoczi wrote:
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> Open issues:
> - Locking?
> - thin_seek_hole_data() does not run as a bio or request. This patch
> assumes dm_thin_find_mapped_range() synchronously performs I/O if
> metadata needs to be loaded from disk. Is that a valid assumption?
> ---
> drivers/md/dm-thin.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 77 insertions(+)
>
> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index 4793ad2aa1f7e..3c5dc4f0fe8a3 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
> @@ -4501,6 +4501,82 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
> }
> }
>
> +static dm_block_t loff_to_block(struct pool *pool, loff_t offset)
> +{
> + sector_t offset_sectors = offset >> SECTOR_SHIFT;
> + dm_block_t ret;
> +
> + if (block_size_is_power_of_two(pool))
> + ret = offset_sectors >> pool->sectors_per_block_shift;
> + else {
> + ret = offset_sectors;
> + (void) sector_div(ret, pool->sectors_per_block);
> + }
> +
> + return ret;
> +}
> +
> +static loff_t block_to_loff(struct pool *pool, dm_block_t block)
> +{
> + return block_to_sectors(pool, block) << SECTOR_SHIFT;
> +}
> +
> +static loff_t thin_seek_hole_data(struct dm_target *ti, loff_t offset,
> + int whence)
> +{
> + struct thin_c *tc = ti->private;
> + struct dm_thin_device *td = tc->td;
> + struct pool *pool = tc->pool;
> + dm_block_t begin;
> + dm_block_t end;
> + dm_block_t mapped_begin;
> + dm_block_t mapped_end;
> + dm_block_t pool_begin;
> + bool maybe_shared;
> + int ret;
> +
> + /* TODO locking? */
> +
> + if (block_size_is_power_of_two(pool))
> + end = ti->len >> pool->sectors_per_block_shift;
> + else {
> + end = ti->len;
> + (void) sector_div(end, pool->sectors_per_block);
> + }
> +
> + offset -= ti->begin << SECTOR_SHIFT;
> +
> + while (true) {
> + begin = loff_to_block(pool, offset);
> + ret = dm_thin_find_mapped_range(td, begin, end,
> + &mapped_begin, &mapped_end,
> + &pool_begin, &maybe_shared);
> + if (ret == -ENODATA) {
> + if (whence == SEEK_DATA)
> + return -ENXIO;
> + break;
> + } else if (ret < 0) {
> + /* TODO handle EWOULDBLOCK? */
> + return -ENXIO;
This should probably be -EIO, not -ENXIO.
> + }
> +
> + /* SEEK_DATA finishes here... */
> + if (whence == SEEK_DATA) {
> + if (mapped_begin != begin)
> + offset = block_to_loff(pool, mapped_begin);
> + break;
> + }
> +
> + /* ...while SEEK_HOLE may need to look further */
> + if (mapped_begin != begin)
> + break; /* offset is in a hole */
> +
> + offset = block_to_loff(pool, mapped_end);
> + }
> +
> + return offset + (ti->begin << SECTOR_SHIFT);
It's hard to follow, but I'm fairly certain that if whence ==
SEEK_HOLE, you end up returning ti->begin + ti->len instead of -ENXIO
if the range from begin to end is fully mapped; which is inconsistent
with the semantics you have in 4/9 (although in 6/9 I argue that
having all of the dm callbacks return ti->begin + ti->len instead of
-ENXIO might make logic easier for iterating through consecutive ti,
and then convert to -ENXIO only in the caller).
> +}
> +
> static struct target_type thin_target = {
> .name = "thin",
> .version = {1, 23, 0},
> @@ -4515,6 +4591,7 @@ static struct target_type thin_target = {
> .status = thin_status,
> .iterate_devices = thin_iterate_devices,
> .io_hints = thin_io_hints,
> + .seek_hole_data = thin_seek_hole_data,
> };
>
> /*----------------------------------------------------------------*/
> --
> 2.44.0
>
--
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization: qemu.org | libguestfs.org
On Thu, Mar 28, 2024 at 08:31:21PM -0500, Eric Blake wrote:
> On Thu, Mar 28, 2024 at 04:39:09PM -0400, Stefan Hajnoczi wrote:
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > Open issues:
> > - Locking?
> > - thin_seek_hole_data() does not run as a bio or request. This patch
> > assumes dm_thin_find_mapped_range() synchronously performs I/O if
> > metadata needs to be loaded from disk. Is that a valid assumption?
> > ---
> > drivers/md/dm-thin.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 77 insertions(+)
> >
> > diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> > index 4793ad2aa1f7e..3c5dc4f0fe8a3 100644
> > --- a/drivers/md/dm-thin.c
> > +++ b/drivers/md/dm-thin.c
> > @@ -4501,6 +4501,82 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
> > }
> > }
> >
> > +static dm_block_t loff_to_block(struct pool *pool, loff_t offset)
> > +{
> > + sector_t offset_sectors = offset >> SECTOR_SHIFT;
> > + dm_block_t ret;
> > +
> > + if (block_size_is_power_of_two(pool))
> > + ret = offset_sectors >> pool->sectors_per_block_shift;
> > + else {
> > + ret = offset_sectors;
> > + (void) sector_div(ret, pool->sectors_per_block);
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +static loff_t block_to_loff(struct pool *pool, dm_block_t block)
> > +{
> > + return block_to_sectors(pool, block) << SECTOR_SHIFT;
> > +}
> > +
> > +static loff_t thin_seek_hole_data(struct dm_target *ti, loff_t offset,
> > + int whence)
> > +{
> > + struct thin_c *tc = ti->private;
> > + struct dm_thin_device *td = tc->td;
> > + struct pool *pool = tc->pool;
> > + dm_block_t begin;
> > + dm_block_t end;
> > + dm_block_t mapped_begin;
> > + dm_block_t mapped_end;
> > + dm_block_t pool_begin;
> > + bool maybe_shared;
> > + int ret;
> > +
> > + /* TODO locking? */
> > +
> > + if (block_size_is_power_of_two(pool))
> > + end = ti->len >> pool->sectors_per_block_shift;
> > + else {
> > + end = ti->len;
> > + (void) sector_div(end, pool->sectors_per_block);
> > + }
> > +
> > + offset -= ti->begin << SECTOR_SHIFT;
> > +
> > + while (true) {
> > + begin = loff_to_block(pool, offset);
> > + ret = dm_thin_find_mapped_range(td, begin, end,
> > + &mapped_begin, &mapped_end,
> > + &pool_begin, &maybe_shared);
> > + if (ret == -ENODATA) {
> > + if (whence == SEEK_DATA)
> > + return -ENXIO;
> > + break;
> > + } else if (ret < 0) {
> > + /* TODO handle EWOULDBLOCK? */
> > + return -ENXIO;
>
> This should probably be -EIO, not -ENXIO.
Yes. XFS also returns -EIO, so I guess it's okay to do so.
I still need to get to the bottom of whether calling
dm_thin_find_mapped_range() is sane here and what to do when/if it
returns EWOULDBLOCK.
> > + }
> > +
> > + /* SEEK_DATA finishes here... */
> > + if (whence == SEEK_DATA) {
> > + if (mapped_begin != begin)
> > + offset = block_to_loff(pool, mapped_begin);
> > + break;
> > + }
> > +
> > + /* ...while SEEK_HOLE may need to look further */
> > + if (mapped_begin != begin)
> > + break; /* offset is in a hole */
> > +
> > + offset = block_to_loff(pool, mapped_end);
> > + }
> > +
> > + return offset + (ti->begin << SECTOR_SHIFT);
>
> It's hard to follow, but I'm fairly certain that if whence ==
> SEEK_HOLE, you end up returning ti->begin + ti->len instead of -ENXIO
> if the range from begin to end is fully mapped; which is inconsistent
> with the semantics you have in 4/9 (although in 6/9 I argue that
> having all of the dm callbacks return ti->begin + ti->len instead of
> -ENXIO might make logic easier for iterating through consecutive ti,
> and then convert to -ENXIO only in the caller).
Returning (ti->begin + ti->len) << SECTOR_SHIFT for SEEK_HOLE when there
is data at the end of the target is intentional. This matches the
semantics of lseek().
I agree there is adjustment necessary in dm.c, but I want to seek the
semantics of all lseek() functions identical to avoid confusion.
Stefan
© 2016 - 2025 Red Hat, Inc.