block: add llseek(SEEK_HOLE/SEEK_DATA) support

[RFC 3/9] selftests: block_seek_hole: add loop block driver tests

Posted by Stefan Hajnoczi 1 year, 10 months ago

Run the tests with:

  $ make TARGETS=block_seek_hole -C tools/selftests run_tests

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 tools/testing/selftests/Makefile              |   1 +
 .../selftests/block_seek_hole/Makefile        |  17 +++
 .../testing/selftests/block_seek_hole/config  |   1 +
 .../selftests/block_seek_hole/map_holes.py    |  37 +++++++
 .../testing/selftests/block_seek_hole/test.py | 103 ++++++++++++++++++
 5 files changed, 159 insertions(+)
 create mode 100644 tools/testing/selftests/block_seek_hole/Makefile
 create mode 100644 tools/testing/selftests/block_seek_hole/config
 create mode 100755 tools/testing/selftests/block_seek_hole/map_holes.py
 create mode 100755 tools/testing/selftests/block_seek_hole/test.py

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index e1504833654db..8a21d6031b940 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -2,6 +2,7 @@
 TARGETS += alsa
 TARGETS += amd-pstate
 TARGETS += arm64
+TARGETS += block_seek_hole
 TARGETS += bpf
 TARGETS += breakpoints
 TARGETS += cachestat
diff --git a/tools/testing/selftests/block_seek_hole/Makefile b/tools/testing/selftests/block_seek_hole/Makefile
new file mode 100644
index 0000000000000..3f4bbd52db29f
--- /dev/null
+++ b/tools/testing/selftests/block_seek_hole/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only
+PY3 = $(shell which python3 2>/dev/null)
+
+ifneq ($(PY3),)
+
+TEST_PROGS := test.py
+
+include ../lib.mk
+
+else
+
+all: no_py3_warning
+
+no_py3_warning:
+	@echo "Missing python3. This test will be skipped."
+
+endif
diff --git a/tools/testing/selftests/block_seek_hole/config b/tools/testing/selftests/block_seek_hole/config
new file mode 100644
index 0000000000000..72437e0c0fc1c
--- /dev/null
+++ b/tools/testing/selftests/block_seek_hole/config
@@ -0,0 +1 @@
+CONFIG_BLK_DEV_LOOP=m
diff --git a/tools/testing/selftests/block_seek_hole/map_holes.py b/tools/testing/selftests/block_seek_hole/map_holes.py
new file mode 100755
index 0000000000000..9477ec5d69d3a
--- /dev/null
+++ b/tools/testing/selftests/block_seek_hole/map_holes.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# map_holes.py <filename>
+#
+# Print the holes and data ranges in a file.
+
+import errno
+import os
+import sys
+
+def map_holes(fd):
+    end = os.lseek(fd, 0, os.SEEK_END)
+    offset = 0
+
+    print('TYPE START END SIZE')
+
+    while offset < end:
+        contents = 'DATA'
+        new_offset = os.lseek(fd, offset, os.SEEK_HOLE)
+        if new_offset == offset:
+            contents = 'HOLE'
+            try:
+              new_offset = os.lseek(fd, offset, os.SEEK_DATA)
+            except OSError as err:
+                if err.errno == errno.ENXIO:
+                    new_offset = end
+                else:
+                    raise err
+            assert new_offset != offset
+        print(f'{contents} {offset} {new_offset} {new_offset - offset}')
+        offset = new_offset
+
+if __name__ == '__main__':
+    with open(sys.argv[1], 'rb') as f:
+        fd = f.fileno()
+        map_holes(fd)
diff --git a/tools/testing/selftests/block_seek_hole/test.py b/tools/testing/selftests/block_seek_hole/test.py
new file mode 100755
index 0000000000000..4f7c2d01ab3d3
--- /dev/null
+++ b/tools/testing/selftests/block_seek_hole/test.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# test.py
+#
+# Test SEEK_HOLE/SEEK_DATA support in block drivers
+
+import os
+import subprocess
+import sys
+from contextlib import contextmanager
+
+KB = 1024
+MB = 1024 * KB
+
+def run(args):
+    try:
+        cmd = subprocess.run(args, check=True, capture_output=True)
+    except subprocess.CalledProcessError as e:
+        print(e)
+        print(e.stderr.decode('utf-8').strip())
+        sys.exit(1)
+    return cmd
+
+@contextmanager
+def test_file(layout_fn, prefix='test'):
+    '''A context manager that creates a test file and produces its path'''
+    path = f'{prefix}-{os.getpid()}'
+    with open(path, 'w+b') as f:
+        layout_fn(f)
+
+    try:
+        yield path
+    finally:
+        os.unlink(path)
+
+@contextmanager
+def loop_device(file_path):
+    '''A context manager that attaches a loop device for a given file and produces the path of the loop device'''
+    cmd = run(['losetup', '--show', '-f', file_path])
+    loop_path = os.fsdecode(cmd.stdout.strip())
+
+    try:
+        yield loop_path
+    finally:
+        run(['losetup', '-d', loop_path])
+
+def test(layout, dev_context_manager):
+    with test_file(layout) as file_path, dev_context_manager(file_path) as dev_path:
+        cmd = run(['./map_holes.py', file_path])
+        file_output = cmd.stdout.decode('utf-8').strip()
+
+        cmd = run(['./map_holes.py', dev_path])
+        dev_output = cmd.stdout.decode('utf-8').strip()
+
+        if file_output != dev_output:
+            print(f'FAIL {dev_context_manager.__name__} {layout.__name__}')
+            print('File output:')
+            print(file_output)
+            print('Does not match device output:')
+            print(dev_output)
+            sys.exit(1)
+
+def test_all(layouts, dev_context_managers):
+    for dev_context_manager in dev_context_managers:
+        for layout in layouts:
+            test(layout, dev_context_manager)
+
+# Different data layouts to test
+
+def data_at_beginning_and_end(f):
+    f.write(b'A' * 4 * KB)
+    f.seek(256 * MB)
+
+    f.write(b'B' * 64 * KB)
+
+    f.seek(1024 * MB - KB)
+    f.write(b'C' * KB)
+
+def holes_at_beginning_and_end(f):
+    f.seek(128 * MB)
+    f.write(b'A' * 4 * KB)
+
+    f.seek(512 * MB)
+    f.write(b'B' * 64 * KB)
+
+    f.truncate(1024 * MB)
+
+def no_holes(f):
+    # Just 1 MB so test file generation is quick
+    mb = b'A' * MB
+    f.write(mb)
+
+def empty_file(f):
+    f.truncate(1024 * MB)
+
+if __name__ == '__main__':
+    layouts = [data_at_beginning_and_end,
+               holes_at_beginning_and_end,
+               no_holes,
+               empty_file]
+    dev_context_managers = [loop_device]
+    test_all(layouts, dev_context_managers)
-- 
2.44.0

Re: [RFC 3/9] selftests: block_seek_hole: add loop block driver tests

Posted by Eric Blake 1 year, 10 months ago

On Thu, Mar 28, 2024 at 04:39:04PM -0400, Stefan Hajnoczi wrote:
> Run the tests with:
> 
>   $ make TARGETS=block_seek_hole -C tools/selftests run_tests
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  tools/testing/selftests/Makefile              |   1 +
>  .../selftests/block_seek_hole/Makefile        |  17 +++
>  .../testing/selftests/block_seek_hole/config  |   1 +
>  .../selftests/block_seek_hole/map_holes.py    |  37 +++++++
>  .../testing/selftests/block_seek_hole/test.py | 103 ++++++++++++++++++
>  5 files changed, 159 insertions(+)
>  create mode 100644 tools/testing/selftests/block_seek_hole/Makefile
>  create mode 100644 tools/testing/selftests/block_seek_hole/config
>  create mode 100755 tools/testing/selftests/block_seek_hole/map_holes.py
>  create mode 100755 tools/testing/selftests/block_seek_hole/test.py
> 

> +
> +def map_holes(fd):
> +    end = os.lseek(fd, 0, os.SEEK_END)
> +    offset = 0
> +
> +    print('TYPE START END SIZE')
> +
> +    while offset < end:
> +        contents = 'DATA'
> +        new_offset = os.lseek(fd, offset, os.SEEK_HOLE)
> +        if new_offset == offset:
> +            contents = 'HOLE'
> +            try:
> +              new_offset = os.lseek(fd, offset, os.SEEK_DATA)
> +            except OSError as err:
> +                if err.errno == errno.ENXIO:
> +                    new_offset = end
> +                else:
> +                    raise err
> +            assert new_offset != offset
> +        print(f'{contents} {offset} {new_offset} {new_offset - offset}')
> +        offset = new_offset

Over the years, I've seen various SEEK_HOLE implementation bugs where
things work great on the initial boundary, but fail when requested on
an offset not aligned to the start of the extent boundary.  It would
probably be worth enhancing the test to prove that:

if lseek(fd, offset, SEEK_HOLE) == offset:
  new_offset = lseek(fd, offset, SEEK_DATA)
  assert new_offset > offset
  assert lseek(fd, new_offset - 1, SEEK_HOLE) == new_offset - 1
else:
  assert lseek(fd, offset, SEEK_DATA) == offset
  new_offset = lseek(fd, offset, SEEK_HOLE)
  assert new_offset > offset
  assert lseek(fd, new_offset - 1, SEEK_DATA) == new_offset - 1

Among other things, this would prove that even though block devices
generally operate on a minimum granularity of a sector, lseek() still
gives byte-accurate results for a random offset that falls in the
middle of a sector, and doesn't accidentally round down reporting an
offset less than the value passed in to the request.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org

Re: [RFC 3/9] selftests: block_seek_hole: add loop block driver tests

Posted by Stefan Hajnoczi 1 year, 10 months ago

On Fri, Mar 29, 2024 at 07:38:17AM -0500, Eric Blake wrote:
> On Thu, Mar 28, 2024 at 04:39:04PM -0400, Stefan Hajnoczi wrote:
> > Run the tests with:
> > 
> >   $ make TARGETS=block_seek_hole -C tools/selftests run_tests
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  tools/testing/selftests/Makefile              |   1 +
> >  .../selftests/block_seek_hole/Makefile        |  17 +++
> >  .../testing/selftests/block_seek_hole/config  |   1 +
> >  .../selftests/block_seek_hole/map_holes.py    |  37 +++++++
> >  .../testing/selftests/block_seek_hole/test.py | 103 ++++++++++++++++++
> >  5 files changed, 159 insertions(+)
> >  create mode 100644 tools/testing/selftests/block_seek_hole/Makefile
> >  create mode 100644 tools/testing/selftests/block_seek_hole/config
> >  create mode 100755 tools/testing/selftests/block_seek_hole/map_holes.py
> >  create mode 100755 tools/testing/selftests/block_seek_hole/test.py
> > 
> 
> > +
> > +def map_holes(fd):
> > +    end = os.lseek(fd, 0, os.SEEK_END)
> > +    offset = 0
> > +
> > +    print('TYPE START END SIZE')
> > +
> > +    while offset < end:
> > +        contents = 'DATA'
> > +        new_offset = os.lseek(fd, offset, os.SEEK_HOLE)
> > +        if new_offset == offset:
> > +            contents = 'HOLE'
> > +            try:
> > +              new_offset = os.lseek(fd, offset, os.SEEK_DATA)
> > +            except OSError as err:
> > +                if err.errno == errno.ENXIO:
> > +                    new_offset = end
> > +                else:
> > +                    raise err
> > +            assert new_offset != offset
> > +        print(f'{contents} {offset} {new_offset} {new_offset - offset}')
> > +        offset = new_offset
> 
> Over the years, I've seen various SEEK_HOLE implementation bugs where
> things work great on the initial boundary, but fail when requested on
> an offset not aligned to the start of the extent boundary.  It would
> probably be worth enhancing the test to prove that:
> 
> if lseek(fd, offset, SEEK_HOLE) == offset:
>   new_offset = lseek(fd, offset, SEEK_DATA)
>   assert new_offset > offset
>   assert lseek(fd, new_offset - 1, SEEK_HOLE) == new_offset - 1
> else:
>   assert lseek(fd, offset, SEEK_DATA) == offset
>   new_offset = lseek(fd, offset, SEEK_HOLE)
>   assert new_offset > offset
>   assert lseek(fd, new_offset - 1, SEEK_DATA) == new_offset - 1
> 
> Among other things, this would prove that even though block devices
> generally operate on a minimum granularity of a sector, lseek() still
> gives byte-accurate results for a random offset that falls in the
> middle of a sector, and doesn't accidentally round down reporting an
> offset less than the value passed in to the request.

Sure. I'll add a test for that.

Stefan

Re: [RFC 3/9] selftests: block_seek_hole: add loop block driver tests

Posted by Eric Blake 1 year, 10 months ago

On Thu, Mar 28, 2024 at 04:39:04PM -0400, Stefan Hajnoczi wrote:
> Run the tests with:
> 
>   $ make TARGETS=block_seek_hole -C tools/selftests run_tests
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  tools/testing/selftests/Makefile              |   1 +
>  .../selftests/block_seek_hole/Makefile        |  17 +++
>  .../testing/selftests/block_seek_hole/config  |   1 +
>  .../selftests/block_seek_hole/map_holes.py    |  37 +++++++
>  .../testing/selftests/block_seek_hole/test.py | 103 ++++++++++++++++++
>  5 files changed, 159 insertions(+)
>  create mode 100644 tools/testing/selftests/block_seek_hole/Makefile
>  create mode 100644 tools/testing/selftests/block_seek_hole/config
>  create mode 100755 tools/testing/selftests/block_seek_hole/map_holes.py
>  create mode 100755 tools/testing/selftests/block_seek_hole/test.py
> 
> +++ b/tools/testing/selftests/block_seek_hole/test.py

> +
> +# Different data layouts to test
> +
> +def data_at_beginning_and_end(f):
> +    f.write(b'A' * 4 * KB)
> +    f.seek(256 * MB)
> +
> +    f.write(b'B' * 64 * KB)
> +
> +    f.seek(1024 * MB - KB)
> +    f.write(b'C' * KB)
> +
> +def holes_at_beginning_and_end(f):
> +    f.seek(128 * MB)
> +    f.write(b'A' * 4 * KB)
> +
> +    f.seek(512 * MB)
> +    f.write(b'B' * 64 * KB)
> +
> +    f.truncate(1024 * MB)
> +
> +def no_holes(f):
> +    # Just 1 MB so test file generation is quick
> +    mb = b'A' * MB
> +    f.write(mb)
> +
> +def empty_file(f):
> +    f.truncate(1024 * MB)

Is it also worth attempting to test a (necessarily sparse!) file
larger than 2GiB to prove that we are 64-bit clean, even on a 32-bit
system where lseek is different than lseek64?  (I honestly have no
idea if python always uses 64-bit seek even on 32-bit systems,
although I would be surprised if it were not)

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org

Re: [RFC 3/9] selftests: block_seek_hole: add loop block driver tests

Posted by Stefan Hajnoczi 1 year, 10 months ago

On Thu, Mar 28, 2024 at 07:11:30PM -0500, Eric Blake wrote:
> On Thu, Mar 28, 2024 at 04:39:04PM -0400, Stefan Hajnoczi wrote:
> > Run the tests with:
> > 
> >   $ make TARGETS=block_seek_hole -C tools/selftests run_tests
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  tools/testing/selftests/Makefile              |   1 +
> >  .../selftests/block_seek_hole/Makefile        |  17 +++
> >  .../testing/selftests/block_seek_hole/config  |   1 +
> >  .../selftests/block_seek_hole/map_holes.py    |  37 +++++++
> >  .../testing/selftests/block_seek_hole/test.py | 103 ++++++++++++++++++
> >  5 files changed, 159 insertions(+)
> >  create mode 100644 tools/testing/selftests/block_seek_hole/Makefile
> >  create mode 100644 tools/testing/selftests/block_seek_hole/config
> >  create mode 100755 tools/testing/selftests/block_seek_hole/map_holes.py
> >  create mode 100755 tools/testing/selftests/block_seek_hole/test.py
> > 
> > +++ b/tools/testing/selftests/block_seek_hole/test.py
> 
> > +
> > +# Different data layouts to test
> > +
> > +def data_at_beginning_and_end(f):
> > +    f.write(b'A' * 4 * KB)
> > +    f.seek(256 * MB)
> > +
> > +    f.write(b'B' * 64 * KB)
> > +
> > +    f.seek(1024 * MB - KB)
> > +    f.write(b'C' * KB)
> > +
> > +def holes_at_beginning_and_end(f):
> > +    f.seek(128 * MB)
> > +    f.write(b'A' * 4 * KB)
> > +
> > +    f.seek(512 * MB)
> > +    f.write(b'B' * 64 * KB)
> > +
> > +    f.truncate(1024 * MB)
> > +
> > +def no_holes(f):
> > +    # Just 1 MB so test file generation is quick
> > +    mb = b'A' * MB
> > +    f.write(mb)
> > +
> > +def empty_file(f):
> > +    f.truncate(1024 * MB)
> 
> Is it also worth attempting to test a (necessarily sparse!) file
> larger than 2GiB to prove that we are 64-bit clean, even on a 32-bit
> system where lseek is different than lseek64?  (I honestly have no
> idea if python always uses 64-bit seek even on 32-bit systems,
> although I would be surprised if it were not)

Yes, it wouldn't hurt to add a test case for that. I'll do that.

Stefan

[RFC 1/9] block: add llseek(SEEK_HOLE/SEEK_DATA) support
[RFC 2/9] loop: add llseek(SEEK_HOLE/SEEK_DATA) support
[RFC 3/9] selftests: block_seek_hole: add loop block driver tests
[RFC 4/9] dm: add llseek(SEEK_HOLE/SEEK_DATA) support
[RFC 5/9] selftests: block_seek_hole: add dm-zero test
[RFC 6/9] dm-linear: add llseek(SEEK_HOLE/SEEK_DATA) support
[RFC 7/9] selftests: block_seek_hole: add dm-linear test
[RFC 8/9] dm thin: add llseek(SEEK_HOLE/SEEK_DATA) support
[RFC 9/9] selftests: block_seek_hole: add dm-thin test