1 | The following changes since commit 711c0418c8c1ce3a24346f058b001c4c5a2f0f81: | 1 | The following changes since commit 1be5a765c08cee3a9587c8a8d3fc2ea247b13f9c: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/philmd/tags/mips-20210702' into staging (2021-07-04 14:04:12 +0100) | 3 | Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2022-04-19 18:22:16 -0700) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/hreitz/qemu.git tags/pull-block-2022-04-20 |
8 | 8 | ||
9 | for you to fetch changes up to 9f460c64e13897117f35ffb61f6f5e0102cabc70: | 9 | for you to fetch changes up to 0423f75351ab83b844a31349218b0eadd830e07a: |
10 | 10 | ||
11 | block/io: Merge discard request alignments (2021-07-06 14:28:55 +0100) | 11 | qcow2: Add errp to rebuild_refcount_structure() (2022-04-20 12:09:17 +0200) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Block patches: |
15 | - Some changes for qcow2's refcount repair algorithm to make it work for | ||
16 | qcow2 images stored on block devices | ||
17 | - Skip test cases that require zstd when support for it is missing | ||
18 | - Some refactoring in the iotests' meson.build | ||
15 | 19 | ||
16 | ---------------------------------------------------------------- | 20 | ---------------------------------------------------------------- |
21 | Hanna Reitz (6): | ||
22 | iotests.py: Add supports_qcow2_zstd_compression() | ||
23 | iotests/065: Check for zstd support | ||
24 | iotests/303: Check for zstd support | ||
25 | qcow2: Improve refcount structure rebuilding | ||
26 | iotests/108: Test new refcount rebuild algorithm | ||
27 | qcow2: Add errp to rebuild_refcount_structure() | ||
17 | 28 | ||
18 | Akihiko Odaki (3): | 29 | Thomas Huth (2): |
19 | block/file-posix: Optimize for macOS | 30 | tests/qemu-iotests/meson.build: Improve the indentation |
20 | block: Add backend_defaults property | 31 | tests/qemu-iotests: Move the bash and sanitizer checks to meson.build |
21 | block/io: Merge discard request alignments | ||
22 | 32 | ||
23 | Stefan Hajnoczi (2): | 33 | block/qcow2-refcount.c | 353 +++++++++++++++++++++++---------- |
24 | util/async: add a human-readable name to BHs for debugging | 34 | tests/check-block.sh | 26 --- |
25 | util/async: print leaked BH name when AioContext finalizes | 35 | tests/qemu-iotests/065 | 24 ++- |
26 | 36 | tests/qemu-iotests/108 | 259 +++++++++++++++++++++++- | |
27 | include/block/aio.h | 31 ++++++++++++++++++++++--- | 37 | tests/qemu-iotests/108.out | 81 ++++++++ |
28 | include/hw/block/block.h | 3 +++ | 38 | tests/qemu-iotests/303 | 4 +- |
29 | include/qemu/main-loop.h | 4 +++- | 39 | tests/qemu-iotests/iotests.py | 20 ++ |
30 | block/file-posix.c | 27 ++++++++++++++++++++-- | 40 | tests/qemu-iotests/meson.build | 73 ++++--- |
31 | block/io.c | 2 ++ | 41 | 8 files changed, 673 insertions(+), 167 deletions(-) |
32 | hw/block/block.c | 42 ++++++++++++++++++++++++++++++---- | ||
33 | tests/unit/ptimer-test-stubs.c | 2 +- | ||
34 | util/async.c | 25 ++++++++++++++++---- | ||
35 | util/main-loop.c | 4 ++-- | ||
36 | tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++ | ||
37 | 10 files changed, 161 insertions(+), 17 deletions(-) | ||
38 | 42 | ||
39 | -- | 43 | -- |
40 | 2.31.1 | 44 | 2.35.1 |
41 | diff view generated by jsdifflib |
1 | BHs must be deleted before the AioContext is finalized. If not, it's a | 1 | From: Thomas Huth <thuth@redhat.com> |
---|---|---|---|
2 | bug and probably indicates that some part of the program still expects | ||
3 | the BH to run in the future. That can lead to memory leaks, inconsistent | ||
4 | state, or just hangs. | ||
5 | 2 | ||
6 | Unfortunately the assert(flags & BH_DELETED) call in aio_ctx_finalize() | 3 | By using subdir_done(), we can get rid of one level of indentation |
7 | is difficult to debug because the assertion failure contains no | 4 | in this file. This will make it easier to add more conditions to |
8 | information about the BH! | 5 | skip the iotests in future patches. |
9 | 6 | ||
10 | Use the QEMUBH name field added in the previous patch to show a useful | 7 | Reviewed-by: Hanna Reitz <hreitz@redhat.com> |
11 | error when a leaked BH is detected. | 8 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> |
9 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
10 | Message-Id: <20220223093840.2515281-3-thuth@redhat.com> | ||
11 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
12 | --- | ||
13 | tests/qemu-iotests/meson.build | 61 ++++++++++++++++++---------------- | ||
14 | 1 file changed, 32 insertions(+), 29 deletions(-) | ||
12 | 15 | ||
13 | Suggested-by: Eric Ernst <eric.g.ernst@gmail.com> | 16 | diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build |
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 17 | index XXXXXXX..XXXXXXX 100644 |
15 | Message-Id: <20210414200247.917496-3-stefanha@redhat.com> | 18 | --- a/tests/qemu-iotests/meson.build |
16 | --- | 19 | +++ b/tests/qemu-iotests/meson.build |
17 | util/async.c | 16 ++++++++++++++-- | 20 | @@ -XXX,XX +XXX,XX @@ |
18 | 1 file changed, 14 insertions(+), 2 deletions(-) | 21 | -if have_tools and targetos != 'windows' and not get_option('gprof') |
22 | - qemu_iotests_binaries = [qemu_img, qemu_io, qemu_nbd, qsd] | ||
23 | - qemu_iotests_env = {'PYTHON': python.full_path()} | ||
24 | - qemu_iotests_formats = { | ||
25 | - 'qcow2': 'quick', | ||
26 | - 'raw': 'slow', | ||
27 | - 'qed': 'thorough', | ||
28 | - 'vmdk': 'thorough', | ||
29 | - 'vpc': 'thorough' | ||
30 | - } | ||
31 | - | ||
32 | - foreach k, v : emulators | ||
33 | - if k.startswith('qemu-system-') | ||
34 | - qemu_iotests_binaries += v | ||
35 | - endif | ||
36 | - endforeach | ||
37 | - foreach format, speed: qemu_iotests_formats | ||
38 | - if speed == 'quick' | ||
39 | - suites = 'block' | ||
40 | - else | ||
41 | - suites = ['block-' + speed, speed] | ||
42 | - endif | ||
43 | - test('qemu-iotests ' + format, sh, args: [files('../check-block.sh'), format], | ||
44 | - depends: qemu_iotests_binaries, env: qemu_iotests_env, | ||
45 | - protocol: 'tap', | ||
46 | - suite: suites, | ||
47 | - timeout: 0, | ||
48 | - is_parallel: false) | ||
49 | - endforeach | ||
50 | +if not have_tools or targetos == 'windows' or get_option('gprof') | ||
51 | + subdir_done() | ||
52 | endif | ||
53 | + | ||
54 | +qemu_iotests_binaries = [qemu_img, qemu_io, qemu_nbd, qsd] | ||
55 | +qemu_iotests_env = {'PYTHON': python.full_path()} | ||
56 | +qemu_iotests_formats = { | ||
57 | + 'qcow2': 'quick', | ||
58 | + 'raw': 'slow', | ||
59 | + 'qed': 'thorough', | ||
60 | + 'vmdk': 'thorough', | ||
61 | + 'vpc': 'thorough' | ||
62 | +} | ||
63 | + | ||
64 | +foreach k, v : emulators | ||
65 | + if k.startswith('qemu-system-') | ||
66 | + qemu_iotests_binaries += v | ||
67 | + endif | ||
68 | +endforeach | ||
69 | + | ||
70 | +foreach format, speed: qemu_iotests_formats | ||
71 | + if speed == 'quick' | ||
72 | + suites = 'block' | ||
73 | + else | ||
74 | + suites = ['block-' + speed, speed] | ||
75 | + endif | ||
76 | + test('qemu-iotests ' + format, sh, args: [files('../check-block.sh'), format], | ||
77 | + depends: qemu_iotests_binaries, env: qemu_iotests_env, | ||
78 | + protocol: 'tap', | ||
79 | + suite: suites, | ||
80 | + timeout: 0, | ||
81 | + is_parallel: false) | ||
82 | +endforeach | ||
83 | -- | ||
84 | 2.35.1 | ||
19 | 85 | ||
20 | diff --git a/util/async.c b/util/async.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/util/async.c | ||
23 | +++ b/util/async.c | ||
24 | @@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source) | ||
25 | assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list)); | ||
26 | |||
27 | while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) { | ||
28 | - /* qemu_bh_delete() must have been called on BHs in this AioContext */ | ||
29 | - assert(flags & BH_DELETED); | ||
30 | + /* | ||
31 | + * qemu_bh_delete() must have been called on BHs in this AioContext. In | ||
32 | + * many cases memory leaks, hangs, or inconsistent state occur when a | ||
33 | + * BH is leaked because something still expects it to run. | ||
34 | + * | ||
35 | + * If you hit this, fix the lifecycle of the BH so that | ||
36 | + * qemu_bh_delete() and any associated cleanup is called before the | ||
37 | + * AioContext is finalized. | ||
38 | + */ | ||
39 | + if (unlikely(!(flags & BH_DELETED))) { | ||
40 | + fprintf(stderr, "%s: BH '%s' leaked, aborting...\n", | ||
41 | + __func__, bh->name); | ||
42 | + abort(); | ||
43 | + } | ||
44 | |||
45 | g_free(bh); | ||
46 | } | ||
47 | -- | ||
48 | 2.31.1 | ||
49 | 86 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Thomas Huth <thuth@redhat.com> | ||
1 | 2 | ||
3 | We want to get rid of check-block.sh in the long run, so let's move | ||
4 | the checks for the bash version and sanitizers from check-block.sh | ||
5 | into the meson.build file instead. | ||
6 | |||
7 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
8 | Message-Id: <20220223093840.2515281-4-thuth@redhat.com> | ||
9 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
10 | --- | ||
11 | tests/check-block.sh | 26 -------------------------- | ||
12 | tests/qemu-iotests/meson.build | 14 ++++++++++++++ | ||
13 | 2 files changed, 14 insertions(+), 26 deletions(-) | ||
14 | |||
15 | diff --git a/tests/check-block.sh b/tests/check-block.sh | ||
16 | index XXXXXXX..XXXXXXX 100755 | ||
17 | --- a/tests/check-block.sh | ||
18 | +++ b/tests/check-block.sh | ||
19 | @@ -XXX,XX +XXX,XX @@ skip() { | ||
20 | exit 0 | ||
21 | } | ||
22 | |||
23 | -# Disable tests with any sanitizer except for specific ones | ||
24 | -SANITIZE_FLAGS=$( grep "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null ) | ||
25 | -ALLOWED_SANITIZE_FLAGS="safe-stack cfi-icall" | ||
26 | -#Remove all occurrencies of allowed Sanitize flags | ||
27 | -for j in ${ALLOWED_SANITIZE_FLAGS}; do | ||
28 | - TMP_FLAGS=${SANITIZE_FLAGS} | ||
29 | - SANITIZE_FLAGS="" | ||
30 | - for i in ${TMP_FLAGS}; do | ||
31 | - if ! echo ${i} | grep -q "${j}" 2>/dev/null; then | ||
32 | - SANITIZE_FLAGS="${SANITIZE_FLAGS} ${i}" | ||
33 | - fi | ||
34 | - done | ||
35 | -done | ||
36 | -if echo ${SANITIZE_FLAGS} | grep -q "\-fsanitize" 2>/dev/null; then | ||
37 | - # Have a sanitize flag that is not allowed, stop | ||
38 | - skip "Sanitizers are enabled ==> Not running the qemu-iotests." | ||
39 | -fi | ||
40 | - | ||
41 | if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then | ||
42 | skip "No qemu-system binary available ==> Not running the qemu-iotests." | ||
43 | fi | ||
44 | |||
45 | -if ! command -v bash >/dev/null 2>&1 ; then | ||
46 | - skip "bash not available ==> Not running the qemu-iotests." | ||
47 | -fi | ||
48 | - | ||
49 | -if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then | ||
50 | - skip "bash version too old ==> Not running the qemu-iotests." | ||
51 | -fi | ||
52 | - | ||
53 | cd tests/qemu-iotests | ||
54 | |||
55 | # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests | ||
56 | diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/tests/qemu-iotests/meson.build | ||
59 | +++ b/tests/qemu-iotests/meson.build | ||
60 | @@ -XXX,XX +XXX,XX @@ if not have_tools or targetos == 'windows' or get_option('gprof') | ||
61 | subdir_done() | ||
62 | endif | ||
63 | |||
64 | +foreach cflag: config_host['QEMU_CFLAGS'].split() | ||
65 | + if cflag.startswith('-fsanitize') and \ | ||
66 | + not cflag.contains('safe-stack') and not cflag.contains('cfi-icall') | ||
67 | + message('Sanitizers are enabled ==> Disabled the qemu-iotests.') | ||
68 | + subdir_done() | ||
69 | + endif | ||
70 | +endforeach | ||
71 | + | ||
72 | +bash = find_program('bash', required: false, version: '>= 4.0') | ||
73 | +if not bash.found() | ||
74 | + message('bash >= v4.0 not available ==> Disabled the qemu-iotests.') | ||
75 | + subdir_done() | ||
76 | +endif | ||
77 | + | ||
78 | qemu_iotests_binaries = [qemu_img, qemu_io, qemu_nbd, qsd] | ||
79 | qemu_iotests_env = {'PYTHON': python.full_path()} | ||
80 | qemu_iotests_formats = { | ||
81 | -- | ||
82 | 2.35.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
3 | Message-Id: <20220323105522.53660-2-hreitz@redhat.com> | ||
4 | Reviewed-by: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> | ||
5 | --- | ||
6 | tests/qemu-iotests/iotests.py | 20 ++++++++++++++++++++ | ||
7 | 1 file changed, 20 insertions(+) | ||
1 | 8 | ||
9 | diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tests/qemu-iotests/iotests.py | ||
12 | +++ b/tests/qemu-iotests/iotests.py | ||
13 | @@ -XXX,XX +XXX,XX @@ def verify_working_luks(): | ||
14 | if not working: | ||
15 | notrun(reason) | ||
16 | |||
17 | +def supports_qcow2_zstd_compression() -> bool: | ||
18 | + img_file = f'{test_dir}/qcow2-zstd-test.qcow2' | ||
19 | + res = qemu_img('create', '-f', 'qcow2', '-o', 'compression_type=zstd', | ||
20 | + img_file, '0', | ||
21 | + check=False) | ||
22 | + try: | ||
23 | + os.remove(img_file) | ||
24 | + except OSError: | ||
25 | + pass | ||
26 | + | ||
27 | + if res.returncode == 1 and \ | ||
28 | + "'compression-type' does not accept value 'zstd'" in res.stdout: | ||
29 | + return False | ||
30 | + else: | ||
31 | + return True | ||
32 | + | ||
33 | +def verify_qcow2_zstd_compression(): | ||
34 | + if not supports_qcow2_zstd_compression(): | ||
35 | + notrun('zstd compression not supported') | ||
36 | + | ||
37 | def qemu_pipe(*args: str) -> str: | ||
38 | """ | ||
39 | Run qemu with an option to print something and exit (e.g. a help option). | ||
40 | -- | ||
41 | 2.35.1 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | Some test cases run in iotest 065 want to run with zstd compression just | ||
2 | for added coverage. Run them with zlib if there is no zstd support | ||
3 | compiled in. | ||
1 | 4 | ||
5 | Reported-by: Thomas Huth <thuth@redhat.com> | ||
6 | Fixes: 12a936171d71f839dc907ff ("iotest 065: explicit compression type") | ||
7 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
8 | Message-Id: <20220323105522.53660-3-hreitz@redhat.com> | ||
9 | Reviewed-by: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> | ||
10 | --- | ||
11 | tests/qemu-iotests/065 | 24 ++++++++++++++++++------ | ||
12 | 1 file changed, 18 insertions(+), 6 deletions(-) | ||
13 | |||
14 | diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 | ||
15 | index XXXXXXX..XXXXXXX 100755 | ||
16 | --- a/tests/qemu-iotests/065 | ||
17 | +++ b/tests/qemu-iotests/065 | ||
18 | @@ -XXX,XX +XXX,XX @@ import os | ||
19 | import re | ||
20 | import json | ||
21 | import iotests | ||
22 | -from iotests import qemu_img, qemu_img_info | ||
23 | +from iotests import qemu_img, qemu_img_info, supports_qcow2_zstd_compression | ||
24 | import unittest | ||
25 | |||
26 | test_img = os.path.join(iotests.test_dir, 'test.img') | ||
27 | @@ -XXX,XX +XXX,XX @@ class TestQCow2(TestQemuImgInfo): | ||
28 | |||
29 | class TestQCow3NotLazy(TestQemuImgInfo): | ||
30 | '''Testing a qcow2 version 3 image with lazy refcounts disabled''' | ||
31 | - img_options = 'compat=1.1,lazy_refcounts=off,compression_type=zstd' | ||
32 | + if supports_qcow2_zstd_compression(): | ||
33 | + compression_type = 'zstd' | ||
34 | + else: | ||
35 | + compression_type = 'zlib' | ||
36 | + | ||
37 | + img_options = 'compat=1.1,lazy_refcounts=off' | ||
38 | + img_options += f',compression_type={compression_type}' | ||
39 | json_compare = { 'compat': '1.1', 'lazy-refcounts': False, | ||
40 | 'refcount-bits': 16, 'corrupt': False, | ||
41 | - 'compression-type': 'zstd', 'extended-l2': False } | ||
42 | - human_compare = [ 'compat: 1.1', 'compression type: zstd', | ||
43 | + 'compression-type': compression_type, 'extended-l2': False } | ||
44 | + human_compare = [ 'compat: 1.1', f'compression type: {compression_type}', | ||
45 | 'lazy refcounts: false', 'refcount bits: 16', | ||
46 | 'corrupt: false', 'extended l2: false' ] | ||
47 | |||
48 | @@ -XXX,XX +XXX,XX @@ class TestQCow3NotLazyQMP(TestQMP): | ||
49 | class TestQCow3LazyQMP(TestQMP): | ||
50 | '''Testing a qcow2 version 3 image with lazy refcounts enabled, opening | ||
51 | with lazy refcounts disabled''' | ||
52 | - img_options = 'compat=1.1,lazy_refcounts=on,compression_type=zstd' | ||
53 | + if supports_qcow2_zstd_compression(): | ||
54 | + compression_type = 'zstd' | ||
55 | + else: | ||
56 | + compression_type = 'zlib' | ||
57 | + | ||
58 | + img_options = 'compat=1.1,lazy_refcounts=on' | ||
59 | + img_options += f',compression_type={compression_type}' | ||
60 | qemu_options = 'lazy-refcounts=off' | ||
61 | compare = { 'compat': '1.1', 'lazy-refcounts': True, | ||
62 | 'refcount-bits': 16, 'corrupt': False, | ||
63 | - 'compression-type': 'zstd', 'extended-l2': False } | ||
64 | + 'compression-type': compression_type, 'extended-l2': False } | ||
65 | |||
66 | TestImageInfoSpecific = None | ||
67 | TestQemuImgInfo = None | ||
68 | -- | ||
69 | 2.35.1 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> | 1 | 303 runs two test cases, one of which requires zstd support. |
---|---|---|---|
2 | Unfortunately, given that this is not a unittest-style test, we cannot | ||
3 | easily skip that single case, and instead can only skip the whole test. | ||
2 | 4 | ||
3 | backend_defaults property allow users to control if default block | 5 | (Alternatively, we could split this test into a zlib and a zstd part, |
4 | properties should be decided with backend information. | 6 | but that seems excessive, given that this test is not in auto and thus |
7 | likely only run by developers who have zstd support compiled in.) | ||
5 | 8 | ||
6 | If it is off, any backend information will be discarded, which is | 9 | Fixes: 677e0bae686e7c670a71d1f ("iotest 303: explicit compression type") |
7 | suitable if you plan to perform live migration to a different disk backend. | 10 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
11 | Reviewed-by: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru> | ||
12 | Message-Id: <20220323105522.53660-4-hreitz@redhat.com> | ||
13 | --- | ||
14 | tests/qemu-iotests/303 | 4 +++- | ||
15 | 1 file changed, 3 insertions(+), 1 deletion(-) | ||
8 | 16 | ||
9 | If it is on, a block device may utilize backend information more | 17 | diff --git a/tests/qemu-iotests/303 b/tests/qemu-iotests/303 |
10 | aggressively. | 18 | index XXXXXXX..XXXXXXX 100755 |
11 | 19 | --- a/tests/qemu-iotests/303 | |
12 | By default, it is auto, which uses backend information for block | 20 | +++ b/tests/qemu-iotests/303 |
13 | sizes and ignores the others, which is consistent with the older | ||
14 | versions. | ||
15 | |||
16 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> | ||
17 | Message-id: 20210705130458.97642-2-akihiko.odaki@gmail.com | ||
18 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | --- | ||
20 | include/hw/block/block.h | 3 +++ | ||
21 | hw/block/block.c | 42 ++++++++++++++++++++++++++++++++++---- | ||
22 | tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++++++ | ||
23 | 3 files changed, 79 insertions(+), 4 deletions(-) | ||
24 | |||
25 | diff --git a/include/hw/block/block.h b/include/hw/block/block.h | ||
26 | index XXXXXXX..XXXXXXX 100644 | ||
27 | --- a/include/hw/block/block.h | ||
28 | +++ b/include/hw/block/block.h | ||
29 | @@ -XXX,XX +XXX,XX @@ | 21 | @@ -XXX,XX +XXX,XX @@ |
30 | 22 | ||
31 | typedef struct BlockConf { | 23 | import iotests |
32 | BlockBackend *blk; | 24 | import subprocess |
33 | + OnOffAuto backend_defaults; | 25 | -from iotests import qemu_img_create, qemu_io, file_path, log, filter_qemu_io |
34 | uint32_t physical_block_size; | 26 | +from iotests import qemu_img_create, qemu_io, file_path, log, filter_qemu_io, \ |
35 | uint32_t logical_block_size; | 27 | + verify_qcow2_zstd_compression |
36 | uint32_t min_io_size; | 28 | |
37 | @@ -XXX,XX +XXX,XX @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) | 29 | iotests.script_initialize(supported_fmts=['qcow2'], |
38 | } | 30 | unsupported_imgopts=['refcount_bits', 'compat']) |
39 | 31 | +verify_qcow2_zstd_compression() | |
40 | #define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \ | 32 | |
41 | + DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \ | 33 | disk = file_path('disk') |
42 | + _conf.backend_defaults, ON_OFF_AUTO_AUTO), \ | 34 | chunk = 1024 * 1024 |
43 | DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ | ||
44 | _conf.logical_block_size), \ | ||
45 | DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ | ||
46 | diff --git a/hw/block/block.c b/hw/block/block.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/hw/block/block.c | ||
49 | +++ b/hw/block/block.c | ||
50 | @@ -XXX,XX +XXX,XX @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp) | ||
51 | { | ||
52 | BlockBackend *blk = conf->blk; | ||
53 | BlockSizes blocksizes; | ||
54 | - int backend_ret; | ||
55 | + BlockDriverState *bs; | ||
56 | + bool use_blocksizes; | ||
57 | + bool use_bs; | ||
58 | + | ||
59 | + switch (conf->backend_defaults) { | ||
60 | + case ON_OFF_AUTO_AUTO: | ||
61 | + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); | ||
62 | + use_bs = false; | ||
63 | + break; | ||
64 | + | ||
65 | + case ON_OFF_AUTO_ON: | ||
66 | + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); | ||
67 | + bs = blk_bs(blk); | ||
68 | + use_bs = bs; | ||
69 | + break; | ||
70 | + | ||
71 | + case ON_OFF_AUTO_OFF: | ||
72 | + use_blocksizes = false; | ||
73 | + use_bs = false; | ||
74 | + break; | ||
75 | + | ||
76 | + default: | ||
77 | + abort(); | ||
78 | + } | ||
79 | |||
80 | - backend_ret = blk_probe_blocksizes(blk, &blocksizes); | ||
81 | /* fill in detected values if they are not defined via qemu command line */ | ||
82 | if (!conf->physical_block_size) { | ||
83 | - if (!backend_ret) { | ||
84 | + if (use_blocksizes) { | ||
85 | conf->physical_block_size = blocksizes.phys; | ||
86 | } else { | ||
87 | conf->physical_block_size = BDRV_SECTOR_SIZE; | ||
88 | } | ||
89 | } | ||
90 | if (!conf->logical_block_size) { | ||
91 | - if (!backend_ret) { | ||
92 | + if (use_blocksizes) { | ||
93 | conf->logical_block_size = blocksizes.log; | ||
94 | } else { | ||
95 | conf->logical_block_size = BDRV_SECTOR_SIZE; | ||
96 | } | ||
97 | } | ||
98 | + if (use_bs) { | ||
99 | + if (!conf->opt_io_size) { | ||
100 | + conf->opt_io_size = bs->bl.opt_transfer; | ||
101 | + } | ||
102 | + if (conf->discard_granularity == -1) { | ||
103 | + if (bs->bl.pdiscard_alignment) { | ||
104 | + conf->discard_granularity = bs->bl.pdiscard_alignment; | ||
105 | + } else if (bs->bl.request_alignment != 1) { | ||
106 | + conf->discard_granularity = bs->bl.request_alignment; | ||
107 | + } | ||
108 | + } | ||
109 | + } | ||
110 | |||
111 | if (conf->logical_block_size > conf->physical_block_size) { | ||
112 | error_setg(errp, | ||
113 | diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out | ||
114 | index XXXXXXX..XXXXXXX 100644 | ||
115 | --- a/tests/qemu-iotests/172.out | ||
116 | +++ b/tests/qemu-iotests/172.out | ||
117 | @@ -XXX,XX +XXX,XX @@ Testing: | ||
118 | dev: floppy, id "" | ||
119 | unit = 0 (0x0) | ||
120 | drive = "floppy0" | ||
121 | + backend_defaults = "auto" | ||
122 | logical_block_size = 512 (512 B) | ||
123 | physical_block_size = 512 (512 B) | ||
124 | min_io_size = 0 (0 B) | ||
125 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 | ||
126 | dev: floppy, id "" | ||
127 | unit = 0 (0x0) | ||
128 | drive = "floppy0" | ||
129 | + backend_defaults = "auto" | ||
130 | logical_block_size = 512 (512 B) | ||
131 | physical_block_size = 512 (512 B) | ||
132 | min_io_size = 0 (0 B) | ||
133 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 | ||
134 | dev: floppy, id "" | ||
135 | unit = 1 (0x1) | ||
136 | drive = "floppy1" | ||
137 | + backend_defaults = "auto" | ||
138 | logical_block_size = 512 (512 B) | ||
139 | physical_block_size = 512 (512 B) | ||
140 | min_io_size = 0 (0 B) | ||
141 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 | ||
142 | dev: floppy, id "" | ||
143 | unit = 0 (0x0) | ||
144 | drive = "floppy0" | ||
145 | + backend_defaults = "auto" | ||
146 | logical_block_size = 512 (512 B) | ||
147 | physical_block_size = 512 (512 B) | ||
148 | min_io_size = 0 (0 B) | ||
149 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 | ||
150 | dev: floppy, id "" | ||
151 | unit = 1 (0x1) | ||
152 | drive = "floppy1" | ||
153 | + backend_defaults = "auto" | ||
154 | logical_block_size = 512 (512 B) | ||
155 | physical_block_size = 512 (512 B) | ||
156 | min_io_size = 0 (0 B) | ||
157 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 | ||
158 | dev: floppy, id "" | ||
159 | unit = 0 (0x0) | ||
160 | drive = "floppy0" | ||
161 | + backend_defaults = "auto" | ||
162 | logical_block_size = 512 (512 B) | ||
163 | physical_block_size = 512 (512 B) | ||
164 | min_io_size = 0 (0 B) | ||
165 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb | ||
166 | dev: floppy, id "" | ||
167 | unit = 1 (0x1) | ||
168 | drive = "floppy1" | ||
169 | + backend_defaults = "auto" | ||
170 | logical_block_size = 512 (512 B) | ||
171 | physical_block_size = 512 (512 B) | ||
172 | min_io_size = 0 (0 B) | ||
173 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb | ||
174 | dev: floppy, id "" | ||
175 | unit = 0 (0x0) | ||
176 | drive = "floppy0" | ||
177 | + backend_defaults = "auto" | ||
178 | logical_block_size = 512 (512 B) | ||
179 | physical_block_size = 512 (512 B) | ||
180 | min_io_size = 0 (0 B) | ||
181 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 | ||
182 | dev: floppy, id "" | ||
183 | unit = 0 (0x0) | ||
184 | drive = "floppy0" | ||
185 | + backend_defaults = "auto" | ||
186 | logical_block_size = 512 (512 B) | ||
187 | physical_block_size = 512 (512 B) | ||
188 | min_io_size = 0 (0 B) | ||
189 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 | ||
190 | dev: floppy, id "" | ||
191 | unit = 1 (0x1) | ||
192 | drive = "floppy1" | ||
193 | + backend_defaults = "auto" | ||
194 | logical_block_size = 512 (512 B) | ||
195 | physical_block_size = 512 (512 B) | ||
196 | min_io_size = 0 (0 B) | ||
197 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 | ||
198 | dev: floppy, id "" | ||
199 | unit = 0 (0x0) | ||
200 | drive = "floppy0" | ||
201 | + backend_defaults = "auto" | ||
202 | logical_block_size = 512 (512 B) | ||
203 | physical_block_size = 512 (512 B) | ||
204 | min_io_size = 0 (0 B) | ||
205 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t | ||
206 | dev: floppy, id "" | ||
207 | unit = 1 (0x1) | ||
208 | drive = "floppy1" | ||
209 | + backend_defaults = "auto" | ||
210 | logical_block_size = 512 (512 B) | ||
211 | physical_block_size = 512 (512 B) | ||
212 | min_io_size = 0 (0 B) | ||
213 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t | ||
214 | dev: floppy, id "" | ||
215 | unit = 0 (0x0) | ||
216 | drive = "floppy0" | ||
217 | + backend_defaults = "auto" | ||
218 | logical_block_size = 512 (512 B) | ||
219 | physical_block_size = 512 (512 B) | ||
220 | min_io_size = 0 (0 B) | ||
221 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 | ||
222 | dev: floppy, id "" | ||
223 | unit = 0 (0x0) | ||
224 | drive = "none0" | ||
225 | + backend_defaults = "auto" | ||
226 | logical_block_size = 512 (512 B) | ||
227 | physical_block_size = 512 (512 B) | ||
228 | min_io_size = 0 (0 B) | ||
229 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 | ||
230 | dev: floppy, id "" | ||
231 | unit = 1 (0x1) | ||
232 | drive = "none0" | ||
233 | + backend_defaults = "auto" | ||
234 | logical_block_size = 512 (512 B) | ||
235 | physical_block_size = 512 (512 B) | ||
236 | min_io_size = 0 (0 B) | ||
237 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco | ||
238 | dev: floppy, id "" | ||
239 | unit = 1 (0x1) | ||
240 | drive = "none1" | ||
241 | + backend_defaults = "auto" | ||
242 | logical_block_size = 512 (512 B) | ||
243 | physical_block_size = 512 (512 B) | ||
244 | min_io_size = 0 (0 B) | ||
245 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco | ||
246 | dev: floppy, id "" | ||
247 | unit = 0 (0x0) | ||
248 | drive = "none0" | ||
249 | + backend_defaults = "auto" | ||
250 | logical_block_size = 512 (512 B) | ||
251 | physical_block_size = 512 (512 B) | ||
252 | min_io_size = 0 (0 B) | ||
253 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
254 | dev: floppy, id "" | ||
255 | unit = 1 (0x1) | ||
256 | drive = "none0" | ||
257 | + backend_defaults = "auto" | ||
258 | logical_block_size = 512 (512 B) | ||
259 | physical_block_size = 512 (512 B) | ||
260 | min_io_size = 0 (0 B) | ||
261 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
262 | dev: floppy, id "" | ||
263 | unit = 0 (0x0) | ||
264 | drive = "floppy0" | ||
265 | + backend_defaults = "auto" | ||
266 | logical_block_size = 512 (512 B) | ||
267 | physical_block_size = 512 (512 B) | ||
268 | min_io_size = 0 (0 B) | ||
269 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
270 | dev: floppy, id "" | ||
271 | unit = 1 (0x1) | ||
272 | drive = "none0" | ||
273 | + backend_defaults = "auto" | ||
274 | logical_block_size = 512 (512 B) | ||
275 | physical_block_size = 512 (512 B) | ||
276 | min_io_size = 0 (0 B) | ||
277 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
278 | dev: floppy, id "" | ||
279 | unit = 0 (0x0) | ||
280 | drive = "floppy0" | ||
281 | + backend_defaults = "auto" | ||
282 | logical_block_size = 512 (512 B) | ||
283 | physical_block_size = 512 (512 B) | ||
284 | min_io_size = 0 (0 B) | ||
285 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
286 | dev: floppy, id "" | ||
287 | unit = 0 (0x0) | ||
288 | drive = "none0" | ||
289 | + backend_defaults = "auto" | ||
290 | logical_block_size = 512 (512 B) | ||
291 | physical_block_size = 512 (512 B) | ||
292 | min_io_size = 0 (0 B) | ||
293 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
294 | dev: floppy, id "" | ||
295 | unit = 1 (0x1) | ||
296 | drive = "floppy1" | ||
297 | + backend_defaults = "auto" | ||
298 | logical_block_size = 512 (512 B) | ||
299 | physical_block_size = 512 (512 B) | ||
300 | min_io_size = 0 (0 B) | ||
301 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
302 | dev: floppy, id "" | ||
303 | unit = 0 (0x0) | ||
304 | drive = "none0" | ||
305 | + backend_defaults = "auto" | ||
306 | logical_block_size = 512 (512 B) | ||
307 | physical_block_size = 512 (512 B) | ||
308 | min_io_size = 0 (0 B) | ||
309 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl | ||
310 | dev: floppy, id "" | ||
311 | unit = 1 (0x1) | ||
312 | drive = "floppy1" | ||
313 | + backend_defaults = "auto" | ||
314 | logical_block_size = 512 (512 B) | ||
315 | physical_block_size = 512 (512 B) | ||
316 | min_io_size = 0 (0 B) | ||
317 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
318 | dev: floppy, id "" | ||
319 | unit = 1 (0x1) | ||
320 | drive = "none0" | ||
321 | + backend_defaults = "auto" | ||
322 | logical_block_size = 512 (512 B) | ||
323 | physical_block_size = 512 (512 B) | ||
324 | min_io_size = 0 (0 B) | ||
325 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
326 | dev: floppy, id "" | ||
327 | unit = 0 (0x0) | ||
328 | drive = "floppy0" | ||
329 | + backend_defaults = "auto" | ||
330 | logical_block_size = 512 (512 B) | ||
331 | physical_block_size = 512 (512 B) | ||
332 | min_io_size = 0 (0 B) | ||
333 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
334 | dev: floppy, id "" | ||
335 | unit = 1 (0x1) | ||
336 | drive = "none0" | ||
337 | + backend_defaults = "auto" | ||
338 | logical_block_size = 512 (512 B) | ||
339 | physical_block_size = 512 (512 B) | ||
340 | min_io_size = 0 (0 B) | ||
341 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q | ||
342 | dev: floppy, id "" | ||
343 | unit = 0 (0x0) | ||
344 | drive = "floppy0" | ||
345 | + backend_defaults = "auto" | ||
346 | logical_block_size = 512 (512 B) | ||
347 | physical_block_size = 512 (512 B) | ||
348 | min_io_size = 0 (0 B) | ||
349 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global floppy.drive=none0 -device | ||
350 | dev: floppy, id "" | ||
351 | unit = 0 (0x0) | ||
352 | drive = "none0" | ||
353 | + backend_defaults = "auto" | ||
354 | logical_block_size = 512 (512 B) | ||
355 | physical_block_size = 512 (512 B) | ||
356 | min_io_size = 0 (0 B) | ||
357 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy | ||
358 | dev: floppy, id "" | ||
359 | unit = 0 (0x0) | ||
360 | drive = "" | ||
361 | + backend_defaults = "auto" | ||
362 | logical_block_size = 512 (512 B) | ||
363 | physical_block_size = 512 (512 B) | ||
364 | min_io_size = 0 (0 B) | ||
365 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=120 | ||
366 | dev: floppy, id "" | ||
367 | unit = 0 (0x0) | ||
368 | drive = "" | ||
369 | + backend_defaults = "auto" | ||
370 | logical_block_size = 512 (512 B) | ||
371 | physical_block_size = 512 (512 B) | ||
372 | min_io_size = 0 (0 B) | ||
373 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=144 | ||
374 | dev: floppy, id "" | ||
375 | unit = 0 (0x0) | ||
376 | drive = "" | ||
377 | + backend_defaults = "auto" | ||
378 | logical_block_size = 512 (512 B) | ||
379 | physical_block_size = 512 (512 B) | ||
380 | min_io_size = 0 (0 B) | ||
381 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=288 | ||
382 | dev: floppy, id "" | ||
383 | unit = 0 (0x0) | ||
384 | drive = "" | ||
385 | + backend_defaults = "auto" | ||
386 | logical_block_size = 512 (512 B) | ||
387 | physical_block_size = 512 (512 B) | ||
388 | min_io_size = 0 (0 B) | ||
389 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t | ||
390 | dev: floppy, id "" | ||
391 | unit = 0 (0x0) | ||
392 | drive = "none0" | ||
393 | + backend_defaults = "auto" | ||
394 | logical_block_size = 512 (512 B) | ||
395 | physical_block_size = 512 (512 B) | ||
396 | min_io_size = 0 (0 B) | ||
397 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t | ||
398 | dev: floppy, id "" | ||
399 | unit = 0 (0x0) | ||
400 | drive = "none0" | ||
401 | + backend_defaults = "auto" | ||
402 | logical_block_size = 512 (512 B) | ||
403 | physical_block_size = 512 (512 B) | ||
404 | min_io_size = 0 (0 B) | ||
405 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical | ||
406 | dev: floppy, id "" | ||
407 | unit = 0 (0x0) | ||
408 | drive = "none0" | ||
409 | + backend_defaults = "auto" | ||
410 | logical_block_size = 512 (512 B) | ||
411 | physical_block_size = 512 (512 B) | ||
412 | min_io_size = 0 (0 B) | ||
413 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica | ||
414 | dev: floppy, id "" | ||
415 | unit = 0 (0x0) | ||
416 | drive = "none0" | ||
417 | + backend_defaults = "auto" | ||
418 | logical_block_size = 512 (512 B) | ||
419 | physical_block_size = 512 (512 B) | ||
420 | min_io_size = 0 (0 B) | ||
421 | -- | 35 | -- |
422 | 2.31.1 | 36 | 2.35.1 |
423 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> | 1 | When rebuilding the refcount structures (when qemu-img check -r found |
---|---|---|---|
2 | errors with refcount = 0, but reference count > 0), the new refcount | ||
3 | table defaults to being put at the image file end[1]. There is no good | ||
4 | reason for that except that it means we will not have to rewrite any | ||
5 | refblocks we already wrote to disk. | ||
2 | 6 | ||
3 | This commit introduces "punch hole" operation and optimizes transfer | 7 | Changing the code to rewrite those refblocks is not too difficult, |
4 | block size for macOS. | 8 | though, so let us do that. That is beneficial for images on block |
9 | devices, where we cannot really write beyond the end of the image file. | ||
5 | 10 | ||
6 | Thanks to Konstantin Nazarov for detailed analysis of a flaw in an | 11 | Use this opportunity to add extensive comments to the code, and refactor |
7 | old version of this change: | 12 | it a bit, getting rid of the backwards-jumping goto. |
8 | https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667 | ||
9 | 13 | ||
10 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> | 14 | [1] Unless there is something allocated in the area pointed to by the |
11 | Message-id: 20210705130458.97642-1-akihiko.odaki@gmail.com | 15 | last refblock, so we have to write that refblock. In that case, we |
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 16 | try to put the reftable in there. |
17 | |||
18 | Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 | ||
19 | Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 | ||
20 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
21 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
22 | Message-Id: <20220405134652.19278-2-hreitz@redhat.com> | ||
13 | --- | 23 | --- |
14 | block/file-posix.c | 27 +++++++++++++++++++++++++-- | 24 | block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ |
15 | 1 file changed, 25 insertions(+), 2 deletions(-) | 25 | 1 file changed, 235 insertions(+), 97 deletions(-) |
16 | 26 | ||
17 | diff --git a/block/file-posix.c b/block/file-posix.c | 27 | diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c |
18 | index XXXXXXX..XXXXXXX 100644 | 28 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/file-posix.c | 29 | --- a/block/qcow2-refcount.c |
20 | +++ b/block/file-posix.c | 30 | +++ b/block/qcow2-refcount.c |
21 | @@ -XXX,XX +XXX,XX @@ | 31 | @@ -XXX,XX +XXX,XX @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, |
22 | #if defined(HAVE_HOST_BLOCK_DEVICE) | 32 | } |
23 | #include <paths.h> | 33 | |
24 | #include <sys/param.h> | 34 | /* |
25 | +#include <sys/mount.h> | 35 | - * Creates a new refcount structure based solely on the in-memory information |
26 | #include <IOKit/IOKitLib.h> | 36 | - * given through *refcount_table. All necessary allocations will be reflected |
27 | #include <IOKit/IOBSD.h> | 37 | - * in that array. |
28 | #include <IOKit/storage/IOMediaBSDClient.h> | 38 | + * Helper function for rebuild_refcount_structure(). |
29 | @@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) | 39 | * |
30 | return; | 40 | - * On success, the old refcount structure is leaked (it will be covered by the |
41 | - * new refcount structure). | ||
42 | + * Scan the range of clusters [first_cluster, end_cluster) for allocated | ||
43 | + * clusters and write all corresponding refblocks to disk. The refblock | ||
44 | + * and allocation data is taken from the in-memory refcount table | ||
45 | + * *refcount_table[] (of size *nb_clusters), which is basically one big | ||
46 | + * (unlimited size) refblock for the whole image. | ||
47 | + * | ||
48 | + * For these refblocks, clusters are allocated using said in-memory | ||
49 | + * refcount table. Care is taken that these allocations are reflected | ||
50 | + * in the refblocks written to disk. | ||
51 | + * | ||
52 | + * The refblocks' offsets are written into a reftable, which is | ||
53 | + * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If | ||
54 | + * that reftable is of insufficient size, it will be resized to fit. | ||
55 | + * This reftable is not written to disk. | ||
56 | + * | ||
57 | + * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed | ||
58 | + * to point to existing valid refblocks that do not need to be allocated | ||
59 | + * again.) | ||
60 | + * | ||
61 | + * Return whether the on-disk reftable array was resized (true/false), | ||
62 | + * or -errno on error. | ||
63 | */ | ||
64 | -static int rebuild_refcount_structure(BlockDriverState *bs, | ||
65 | - BdrvCheckResult *res, | ||
66 | - void **refcount_table, | ||
67 | - int64_t *nb_clusters) | ||
68 | +static int rebuild_refcounts_write_refblocks( | ||
69 | + BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, | ||
70 | + int64_t first_cluster, int64_t end_cluster, | ||
71 | + uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr | ||
72 | + ) | ||
73 | { | ||
74 | BDRVQcow2State *s = bs->opaque; | ||
75 | - int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; | ||
76 | + int64_t cluster; | ||
77 | int64_t refblock_offset, refblock_start, refblock_index; | ||
78 | - uint32_t reftable_size = 0; | ||
79 | - uint64_t *on_disk_reftable = NULL; | ||
80 | + int64_t first_free_cluster = 0; | ||
81 | + uint64_t *on_disk_reftable = *on_disk_reftable_ptr; | ||
82 | + uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; | ||
83 | void *on_disk_refblock; | ||
84 | - int ret = 0; | ||
85 | - struct { | ||
86 | - uint64_t reftable_offset; | ||
87 | - uint32_t reftable_clusters; | ||
88 | - } QEMU_PACKED reftable_offset_and_clusters; | ||
89 | - | ||
90 | - qcow2_cache_empty(bs, s->refcount_block_cache); | ||
91 | + bool reftable_grown = false; | ||
92 | + int ret; | ||
93 | |||
94 | -write_refblocks: | ||
95 | - for (; cluster < *nb_clusters; cluster++) { | ||
96 | + for (cluster = first_cluster; cluster < end_cluster; cluster++) { | ||
97 | + /* Check all clusters to find refblocks that contain non-zero entries */ | ||
98 | if (!s->get_refcount(*refcount_table, cluster)) { | ||
99 | continue; | ||
100 | } | ||
101 | |||
102 | + /* | ||
103 | + * This cluster is allocated, so we need to create a refblock | ||
104 | + * for it. The data we will write to disk is just the | ||
105 | + * respective slice from *refcount_table, so it will contain | ||
106 | + * accurate refcounts for all clusters belonging to this | ||
107 | + * refblock. After we have written it, we will therefore skip | ||
108 | + * all remaining clusters in this refblock. | ||
109 | + */ | ||
110 | + | ||
111 | refblock_index = cluster >> s->refcount_block_bits; | ||
112 | refblock_start = refblock_index << s->refcount_block_bits; | ||
113 | |||
114 | - /* Don't allocate a cluster in a refblock already written to disk */ | ||
115 | - if (first_free_cluster < refblock_start) { | ||
116 | - first_free_cluster = refblock_start; | ||
117 | - } | ||
118 | - refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, | ||
119 | - nb_clusters, &first_free_cluster); | ||
120 | - if (refblock_offset < 0) { | ||
121 | - fprintf(stderr, "ERROR allocating refblock: %s\n", | ||
122 | - strerror(-refblock_offset)); | ||
123 | - res->check_errors++; | ||
124 | - ret = refblock_offset; | ||
125 | - goto fail; | ||
126 | - } | ||
127 | + if (on_disk_reftable_entries > refblock_index && | ||
128 | + on_disk_reftable[refblock_index]) | ||
129 | + { | ||
130 | + /* | ||
131 | + * We can get here after a `goto write_refblocks`: We have a | ||
132 | + * reftable from a previous run, and the refblock is already | ||
133 | + * allocated. No need to allocate it again. | ||
134 | + */ | ||
135 | + refblock_offset = on_disk_reftable[refblock_index]; | ||
136 | + } else { | ||
137 | + int64_t refblock_cluster_index; | ||
138 | |||
139 | - if (reftable_size <= refblock_index) { | ||
140 | - uint32_t old_reftable_size = reftable_size; | ||
141 | - uint64_t *new_on_disk_reftable; | ||
142 | + /* Don't allocate a cluster in a refblock already written to disk */ | ||
143 | + if (first_free_cluster < refblock_start) { | ||
144 | + first_free_cluster = refblock_start; | ||
145 | + } | ||
146 | + refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, | ||
147 | + nb_clusters, | ||
148 | + &first_free_cluster); | ||
149 | + if (refblock_offset < 0) { | ||
150 | + fprintf(stderr, "ERROR allocating refblock: %s\n", | ||
151 | + strerror(-refblock_offset)); | ||
152 | + return refblock_offset; | ||
153 | + } | ||
154 | |||
155 | - reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, | ||
156 | - s->cluster_size) / REFTABLE_ENTRY_SIZE; | ||
157 | - new_on_disk_reftable = g_try_realloc(on_disk_reftable, | ||
158 | - reftable_size * | ||
159 | - REFTABLE_ENTRY_SIZE); | ||
160 | - if (!new_on_disk_reftable) { | ||
161 | - res->check_errors++; | ||
162 | - ret = -ENOMEM; | ||
163 | - goto fail; | ||
164 | + refblock_cluster_index = refblock_offset / s->cluster_size; | ||
165 | + if (refblock_cluster_index >= end_cluster) { | ||
166 | + /* | ||
167 | + * We must write the refblock that holds this refblock's | ||
168 | + * refcount | ||
169 | + */ | ||
170 | + end_cluster = refblock_cluster_index + 1; | ||
171 | } | ||
172 | - on_disk_reftable = new_on_disk_reftable; | ||
173 | |||
174 | - memset(on_disk_reftable + old_reftable_size, 0, | ||
175 | - (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); | ||
176 | + if (on_disk_reftable_entries <= refblock_index) { | ||
177 | + on_disk_reftable_entries = | ||
178 | + ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, | ||
179 | + s->cluster_size) / REFTABLE_ENTRY_SIZE; | ||
180 | + on_disk_reftable = | ||
181 | + g_try_realloc(on_disk_reftable, | ||
182 | + on_disk_reftable_entries * | ||
183 | + REFTABLE_ENTRY_SIZE); | ||
184 | + if (!on_disk_reftable) { | ||
185 | + return -ENOMEM; | ||
186 | + } | ||
187 | |||
188 | - /* The offset we have for the reftable is now no longer valid; | ||
189 | - * this will leak that range, but we can easily fix that by running | ||
190 | - * a leak-fixing check after this rebuild operation */ | ||
191 | - reftable_offset = -1; | ||
192 | - } else { | ||
193 | - assert(on_disk_reftable); | ||
194 | - } | ||
195 | - on_disk_reftable[refblock_index] = refblock_offset; | ||
196 | + memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, | ||
197 | + (on_disk_reftable_entries - | ||
198 | + *on_disk_reftable_entries_ptr) * | ||
199 | + REFTABLE_ENTRY_SIZE); | ||
200 | |||
201 | - /* If this is apparently the last refblock (for now), try to squeeze the | ||
202 | - * reftable in */ | ||
203 | - if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && | ||
204 | - reftable_offset < 0) | ||
205 | - { | ||
206 | - uint64_t reftable_clusters = size_to_clusters(s, reftable_size * | ||
207 | - REFTABLE_ENTRY_SIZE); | ||
208 | - reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, | ||
209 | - refcount_table, nb_clusters, | ||
210 | - &first_free_cluster); | ||
211 | - if (reftable_offset < 0) { | ||
212 | - fprintf(stderr, "ERROR allocating reftable: %s\n", | ||
213 | - strerror(-reftable_offset)); | ||
214 | - res->check_errors++; | ||
215 | - ret = reftable_offset; | ||
216 | - goto fail; | ||
217 | + *on_disk_reftable_ptr = on_disk_reftable; | ||
218 | + *on_disk_reftable_entries_ptr = on_disk_reftable_entries; | ||
219 | + | ||
220 | + reftable_grown = true; | ||
221 | + } else { | ||
222 | + assert(on_disk_reftable); | ||
223 | } | ||
224 | + on_disk_reftable[refblock_index] = refblock_offset; | ||
225 | } | ||
226 | |||
227 | + /* Refblock is allocated, write it to disk */ | ||
228 | + | ||
229 | ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, | ||
230 | s->cluster_size, false); | ||
231 | if (ret < 0) { | ||
232 | fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); | ||
233 | - goto fail; | ||
234 | + return ret; | ||
235 | } | ||
236 | |||
237 | - /* The size of *refcount_table is always cluster-aligned, therefore the | ||
238 | - * write operation will not overflow */ | ||
239 | + /* | ||
240 | + * The refblock is simply a slice of *refcount_table. | ||
241 | + * Note that the size of *refcount_table is always aligned to | ||
242 | + * whole clusters, so the write operation will not result in | ||
243 | + * out-of-bounds accesses. | ||
244 | + */ | ||
245 | on_disk_refblock = (void *)((char *) *refcount_table + | ||
246 | refblock_index * s->cluster_size); | ||
247 | |||
248 | @@ -XXX,XX +XXX,XX @@ write_refblocks: | ||
249 | s->cluster_size); | ||
250 | if (ret < 0) { | ||
251 | fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); | ||
252 | - goto fail; | ||
253 | + return ret; | ||
254 | } | ||
255 | |||
256 | - /* Go to the end of this refblock */ | ||
257 | + /* This refblock is done, skip to its end */ | ||
258 | cluster = refblock_start + s->refcount_block_size - 1; | ||
31 | } | 259 | } |
32 | 260 | ||
33 | +#if defined(__APPLE__) && (__MACH__) | 261 | - if (reftable_offset < 0) { |
34 | + struct statfs buf; | 262 | - uint64_t post_refblock_start, reftable_clusters; |
35 | + | 263 | + return reftable_grown; |
36 | + if (!fstatfs(s->fd, &buf)) { | 264 | +} |
37 | + bs->bl.opt_transfer = buf.f_iosize; | 265 | + |
38 | + bs->bl.pdiscard_alignment = buf.f_bsize; | 266 | +/* |
267 | + * Creates a new refcount structure based solely on the in-memory information | ||
268 | + * given through *refcount_table (this in-memory information is basically just | ||
269 | + * the concatenation of all refblocks). All necessary allocations will be | ||
270 | + * reflected in that array. | ||
271 | + * | ||
272 | + * On success, the old refcount structure is leaked (it will be covered by the | ||
273 | + * new refcount structure). | ||
274 | + */ | ||
275 | +static int rebuild_refcount_structure(BlockDriverState *bs, | ||
276 | + BdrvCheckResult *res, | ||
277 | + void **refcount_table, | ||
278 | + int64_t *nb_clusters) | ||
279 | +{ | ||
280 | + BDRVQcow2State *s = bs->opaque; | ||
281 | + int64_t reftable_offset = -1; | ||
282 | + int64_t reftable_length = 0; | ||
283 | + int64_t reftable_clusters; | ||
284 | + int64_t refblock_index; | ||
285 | + uint32_t on_disk_reftable_entries = 0; | ||
286 | + uint64_t *on_disk_reftable = NULL; | ||
287 | + int ret = 0; | ||
288 | + int reftable_size_changed = 0; | ||
289 | + struct { | ||
290 | + uint64_t reftable_offset; | ||
291 | + uint32_t reftable_clusters; | ||
292 | + } QEMU_PACKED reftable_offset_and_clusters; | ||
293 | + | ||
294 | + qcow2_cache_empty(bs, s->refcount_block_cache); | ||
295 | + | ||
296 | + /* | ||
297 | + * For each refblock containing entries, we try to allocate a | ||
298 | + * cluster (in the in-memory refcount table) and write its offset | ||
299 | + * into on_disk_reftable[]. We then write the whole refblock to | ||
300 | + * disk (as a slice of the in-memory refcount table). | ||
301 | + * This is done by rebuild_refcounts_write_refblocks(). | ||
302 | + * | ||
303 | + * Once we have scanned all clusters, we try to find space for the | ||
304 | + * reftable. This will dirty the in-memory refcount table (i.e. | ||
305 | + * make it differ from the refblocks we have already written), so we | ||
306 | + * need to run rebuild_refcounts_write_refblocks() again for the | ||
307 | + * range of clusters where the reftable has been allocated. | ||
308 | + * | ||
309 | + * This second run might make the reftable grow again, in which case | ||
310 | + * we will need to allocate another space for it, which is why we | ||
311 | + * repeat all this until the reftable stops growing. | ||
312 | + * | ||
313 | + * (This loop will terminate, because with every cluster the | ||
314 | + * reftable grows, it can accomodate a multitude of more refcounts, | ||
315 | + * so that at some point this must be able to cover the reftable | ||
316 | + * and all refblocks describing it.) | ||
317 | + * | ||
318 | + * We then convert the reftable to big-endian and write it to disk. | ||
319 | + * | ||
320 | + * Note that we never free any reftable allocations. Doing so would | ||
321 | + * needlessly complicate the algorithm: The eventual second check | ||
322 | + * run we do will clean up all leaks we have caused. | ||
323 | + */ | ||
324 | + | ||
325 | + reftable_size_changed = | ||
326 | + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, | ||
327 | + 0, *nb_clusters, | ||
328 | + &on_disk_reftable, | ||
329 | + &on_disk_reftable_entries); | ||
330 | + if (reftable_size_changed < 0) { | ||
331 | + res->check_errors++; | ||
332 | + ret = reftable_size_changed; | ||
333 | + goto fail; | ||
39 | + } | 334 | + } |
40 | +#endif | 335 | + |
41 | + | 336 | + /* |
42 | if (bs->sg || S_ISBLK(st.st_mode)) { | 337 | + * There was no reftable before, so rebuild_refcounts_write_refblocks() |
43 | int ret = hdev_get_max_hw_transfer(s->fd, &st); | 338 | + * must have increased its size (from 0 to something). |
44 | 339 | + */ | |
45 | @@ -XXX,XX +XXX,XX @@ out: | 340 | + assert(reftable_size_changed); |
341 | + | ||
342 | + do { | ||
343 | + int64_t reftable_start_cluster, reftable_end_cluster; | ||
344 | + int64_t first_free_cluster = 0; | ||
345 | + | ||
346 | + reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; | ||
347 | + reftable_clusters = size_to_clusters(s, reftable_length); | ||
348 | |||
349 | - post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); | ||
350 | - reftable_clusters = | ||
351 | - size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); | ||
352 | - /* Not pretty but simple */ | ||
353 | - if (first_free_cluster < post_refblock_start) { | ||
354 | - first_free_cluster = post_refblock_start; | ||
355 | - } | ||
356 | reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, | ||
357 | refcount_table, nb_clusters, | ||
358 | &first_free_cluster); | ||
359 | @@ -XXX,XX +XXX,XX @@ write_refblocks: | ||
360 | goto fail; | ||
361 | } | ||
362 | |||
363 | - goto write_refblocks; | ||
364 | - } | ||
365 | + /* | ||
366 | + * We need to update the affected refblocks, so re-run the | ||
367 | + * write_refblocks loop for the reftable's range of clusters. | ||
368 | + */ | ||
369 | + assert(offset_into_cluster(s, reftable_offset) == 0); | ||
370 | + reftable_start_cluster = reftable_offset / s->cluster_size; | ||
371 | + reftable_end_cluster = reftable_start_cluster + reftable_clusters; | ||
372 | + reftable_size_changed = | ||
373 | + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, | ||
374 | + reftable_start_cluster, | ||
375 | + reftable_end_cluster, | ||
376 | + &on_disk_reftable, | ||
377 | + &on_disk_reftable_entries); | ||
378 | + if (reftable_size_changed < 0) { | ||
379 | + res->check_errors++; | ||
380 | + ret = reftable_size_changed; | ||
381 | + goto fail; | ||
382 | + } | ||
383 | + | ||
384 | + /* | ||
385 | + * If the reftable size has changed, we will need to find a new | ||
386 | + * allocation, repeating the loop. | ||
387 | + */ | ||
388 | + } while (reftable_size_changed); | ||
389 | |||
390 | - for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { | ||
391 | + /* The above loop must have run at least once */ | ||
392 | + assert(reftable_offset >= 0); | ||
393 | + | ||
394 | + /* | ||
395 | + * All allocations are done, all refblocks are written, convert the | ||
396 | + * reftable to big-endian and write it to disk. | ||
397 | + */ | ||
398 | + | ||
399 | + for (refblock_index = 0; refblock_index < on_disk_reftable_entries; | ||
400 | + refblock_index++) | ||
401 | + { | ||
402 | cpu_to_be64s(&on_disk_reftable[refblock_index]); | ||
46 | } | 403 | } |
47 | } | 404 | |
48 | 405 | - ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, | |
49 | +#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD) | 406 | - reftable_size * REFTABLE_ENTRY_SIZE, |
50 | static int translate_err(int err) | 407 | + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, |
51 | { | 408 | false); |
52 | if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || | 409 | if (ret < 0) { |
53 | @@ -XXX,XX +XXX,XX @@ static int translate_err(int err) | 410 | fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); |
411 | goto fail; | ||
54 | } | 412 | } |
55 | return err; | 413 | |
56 | } | 414 | - assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); |
57 | +#endif | 415 | + assert(reftable_length < INT_MAX); |
58 | 416 | ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, | |
59 | #ifdef CONFIG_FALLOCATE | 417 | - reftable_size * REFTABLE_ENTRY_SIZE); |
60 | static int do_fallocate(int fd, int mode, off_t offset, off_t len) | 418 | + reftable_length); |
61 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_discard(void *opaque) | 419 | if (ret < 0) { |
62 | } | 420 | fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); |
63 | } while (errno == EINTR); | 421 | goto fail; |
64 | 422 | @@ -XXX,XX +XXX,XX @@ write_refblocks: | |
65 | - ret = -errno; | 423 | /* Enter new reftable into the image header */ |
66 | + ret = translate_err(-errno); | 424 | reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); |
67 | #endif | 425 | reftable_offset_and_clusters.reftable_clusters = |
68 | } else { | 426 | - cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); |
69 | #ifdef CONFIG_FALLOCATE_PUNCH_HOLE | 427 | + cpu_to_be32(reftable_clusters); |
70 | ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, | 428 | ret = bdrv_pwrite_sync(bs->file, |
71 | aiocb->aio_offset, aiocb->aio_nbytes); | 429 | offsetof(QCowHeader, refcount_table_offset), |
72 | + ret = translate_err(-errno); | 430 | &reftable_offset_and_clusters, |
73 | +#elif defined(__APPLE__) && (__MACH__) | 431 | @@ -XXX,XX +XXX,XX @@ write_refblocks: |
74 | + fpunchhole_t fpunchhole; | 432 | goto fail; |
75 | + fpunchhole.fp_flags = 0; | ||
76 | + fpunchhole.reserved = 0; | ||
77 | + fpunchhole.fp_offset = aiocb->aio_offset; | ||
78 | + fpunchhole.fp_length = aiocb->aio_nbytes; | ||
79 | + if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) { | ||
80 | + ret = errno == ENODEV ? -ENOTSUP : -errno; | ||
81 | + } else { | ||
82 | + ret = 0; | ||
83 | + } | ||
84 | #endif | ||
85 | } | 433 | } |
86 | 434 | ||
87 | - ret = translate_err(ret); | 435 | - for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { |
88 | if (ret == -ENOTSUP) { | 436 | + for (refblock_index = 0; refblock_index < on_disk_reftable_entries; |
89 | s->has_discard = false; | 437 | + refblock_index++) |
438 | + { | ||
439 | be64_to_cpus(&on_disk_reftable[refblock_index]); | ||
90 | } | 440 | } |
441 | s->refcount_table = on_disk_reftable; | ||
442 | s->refcount_table_offset = reftable_offset; | ||
443 | - s->refcount_table_size = reftable_size; | ||
444 | + s->refcount_table_size = on_disk_reftable_entries; | ||
445 | update_max_refcount_table_index(s); | ||
446 | |||
447 | return 0; | ||
91 | -- | 448 | -- |
92 | 2.31.1 | 449 | 2.35.1 |
93 | diff view generated by jsdifflib |
1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> | 1 | One clear problem with how qcow2's refcount structure rebuild algorithm |
---|---|---|---|
2 | used to be before "qcow2: Improve refcount structure rebuilding" was | ||
3 | that it is prone to failure for qcow2 images on block devices: There is | ||
4 | generally unused space after the actual image, and if that exceeds what | ||
5 | one refblock covers, the old algorithm would invariably write the | ||
6 | reftable past the block device's end, which cannot work. The new | ||
7 | algorithm does not have this problem. | ||
2 | 8 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> | 9 | Test it with three tests: |
4 | Message-id: 20210705130458.97642-3-akihiko.odaki@gmail.com | 10 | (1) Create an image with more empty space at the end than what one |
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | refblock covers, see whether rebuilding the refcount structures |
12 | results in a change in the image file length. (It should not.) | ||
13 | |||
14 | (2) Leave precisely enough space somewhere at the beginning of the image | ||
15 | for the new reftable (and the refblock for that place), see whether | ||
16 | the new algorithm puts the reftable there. (It should.) | ||
17 | |||
18 | (3) Test the original problem: Create (something like) a block device | ||
19 | with a fixed size, then create a qcow2 image in there, write some | ||
20 | data, and then have qemu-img check rebuild the refcount structures. | ||
21 | Before HEAD^, the reftable would have been written past the image | ||
22 | file end, i.e. outside of what the block device provides, which | ||
23 | cannot work. HEAD^ should have fixed that. | ||
24 | ("Something like a block device" means a loop device if we can use | ||
25 | one ("sudo -n losetup" works), or a FUSE block export with | ||
26 | growable=false otherwise.) | ||
27 | |||
28 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
29 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> | ||
30 | Message-Id: <20220405134652.19278-3-hreitz@redhat.com> | ||
6 | --- | 31 | --- |
7 | block/io.c | 2 ++ | 32 | tests/qemu-iotests/108 | 259 ++++++++++++++++++++++++++++++++++++- |
8 | 1 file changed, 2 insertions(+) | 33 | tests/qemu-iotests/108.out | 81 ++++++++++++ |
34 | 2 files changed, 339 insertions(+), 1 deletion(-) | ||
9 | 35 | ||
10 | diff --git a/block/io.c b/block/io.c | 36 | diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 |
37 | index XXXXXXX..XXXXXXX 100755 | ||
38 | --- a/tests/qemu-iotests/108 | ||
39 | +++ b/tests/qemu-iotests/108 | ||
40 | @@ -XXX,XX +XXX,XX @@ status=1 # failure is the default! | ||
41 | |||
42 | _cleanup() | ||
43 | { | ||
44 | - _cleanup_test_img | ||
45 | + _cleanup_test_img | ||
46 | + if [ -f "$TEST_DIR/qsd.pid" ]; then | ||
47 | + qsd_pid=$(cat "$TEST_DIR/qsd.pid") | ||
48 | + kill -KILL "$qsd_pid" | ||
49 | + fusermount -u "$TEST_DIR/fuse-export" &>/dev/null | ||
50 | + fi | ||
51 | + rm -f "$TEST_DIR/fuse-export" | ||
52 | } | ||
53 | trap "_cleanup; exit \$status" 0 1 2 3 15 | ||
54 | |||
55 | # get standard environment, filters and checks | ||
56 | . ./common.rc | ||
57 | . ./common.filter | ||
58 | +. ./common.qemu | ||
59 | |||
60 | # This tests qcow2-specific low-level functionality | ||
61 | _supported_fmt qcow2 | ||
62 | @@ -XXX,XX +XXX,XX @@ _supported_os Linux | ||
63 | # files | ||
64 | _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file | ||
65 | |||
66 | +# This test either needs sudo -n losetup or FUSE exports to work | ||
67 | +if sudo -n losetup &>/dev/null; then | ||
68 | + loopdev=true | ||
69 | +else | ||
70 | + loopdev=false | ||
71 | + | ||
72 | + # QSD --export fuse will either yield "Parameter 'id' is missing" | ||
73 | + # or "Invalid parameter 'fuse'", depending on whether there is | ||
74 | + # FUSE support or not. | ||
75 | + error=$($QSD --export fuse 2>&1) | ||
76 | + if [[ $error = *"'fuse'"* ]]; then | ||
77 | + _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ | ||
78 | + 'neither is available' | ||
79 | + fi | ||
80 | +fi | ||
81 | + | ||
82 | echo | ||
83 | echo '=== Repairing an image without any refcount table ===' | ||
84 | echo | ||
85 | @@ -XXX,XX +XXX,XX @@ _make_test_img 64M | ||
86 | poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" | ||
87 | _check_test_img -r all | ||
88 | |||
89 | +echo | ||
90 | +echo '=== Check rebuilt reftable location ===' | ||
91 | + | ||
92 | +# In an earlier version of the refcount rebuild algorithm, the | ||
93 | +# reftable was generally placed at the image end (unless something was | ||
94 | +# allocated in the area covered by the refblock right before the image | ||
95 | +# file end, then we would try to place the reftable in that refblock). | ||
96 | +# This was later changed so the reftable would be placed in the | ||
97 | +# earliest possible location. Test this. | ||
98 | + | ||
99 | +echo | ||
100 | +echo '--- Does the image size increase? ---' | ||
101 | +echo | ||
102 | + | ||
103 | +# First test: Just create some image, write some data to it, and | ||
104 | +# resize it so there is free space at the end of the image (enough | ||
105 | +# that it spans at least one full refblock, which for cluster_size=512 | ||
106 | +# images, spans 128k). With the old algorithm, the reftable would | ||
107 | +# have then been placed at the end of the image file, but with the new | ||
108 | +# one, it will be put in that free space. | ||
109 | +# We want to check whether the size of the image file increases due to | ||
110 | +# rebuilding the refcount structures (it should not). | ||
111 | + | ||
112 | +_make_test_img -o 'cluster_size=512' 1M | ||
113 | +# Write something | ||
114 | +$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io | ||
115 | + | ||
116 | +# Add free space | ||
117 | +file_len=$(stat -c '%s' "$TEST_IMG") | ||
118 | +truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" | ||
119 | + | ||
120 | +# Corrupt the image by saying the image header was not allocated | ||
121 | +rt_offset=$(peek_file_be "$TEST_IMG" 48 8) | ||
122 | +rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) | ||
123 | +poke_file "$TEST_IMG" $rb_offset "\x00\x00" | ||
124 | + | ||
125 | +# Check whether rebuilding the refcount structures increases the image | ||
126 | +# file size | ||
127 | +file_len=$(stat -c '%s' "$TEST_IMG") | ||
128 | +echo | ||
129 | +# The only leaks there can be are the old refcount structures that are | ||
130 | +# leaked during rebuilding, no need to clutter the output with them | ||
131 | +_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' | ||
132 | +echo | ||
133 | +post_repair_file_len=$(stat -c '%s' "$TEST_IMG") | ||
134 | + | ||
135 | +if [[ $file_len -eq $post_repair_file_len ]]; then | ||
136 | + echo 'OK: Image size did not change' | ||
137 | +else | ||
138 | + echo 'ERROR: Image size differs' \ | ||
139 | + "($file_len before, $post_repair_file_len after)" | ||
140 | +fi | ||
141 | + | ||
142 | +echo | ||
143 | +echo '--- Will the reftable occupy a hole specifically left for it? ---' | ||
144 | +echo | ||
145 | + | ||
146 | +# Note: With cluster_size=512, every refblock covers 128k. | ||
147 | +# The reftable covers 8M per reftable cluster. | ||
148 | + | ||
149 | +# Create an image that requires two reftable clusters (just because | ||
150 | +# this is more interesting than a single-clustered reftable). | ||
151 | +_make_test_img -o 'cluster_size=512' 9M | ||
152 | +$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io | ||
153 | + | ||
154 | +# Writing 8M will have resized the reftable. Unfortunately, doing so | ||
155 | +# will leave holes in the file, so we need to fill them up so we can | ||
156 | +# be sure the whole file is allocated. Do that by writing | ||
157 | +# consecutively smaller chunks starting from 8 MB, until the file | ||
158 | +# length increases even with a chunk size of 512. Then we must have | ||
159 | +# filled all holes. | ||
160 | +ofs=$((8 * 1024 * 1024)) | ||
161 | +block_len=$((16 * 1024)) | ||
162 | +while [[ $block_len -ge 512 ]]; do | ||
163 | + file_len=$(stat -c '%s' "$TEST_IMG") | ||
164 | + while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do | ||
165 | + # Do not include this in the reference output, it does not | ||
166 | + # really matter which qemu-io calls we do here exactly | ||
167 | + $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null | ||
168 | + ofs=$((ofs + block_len)) | ||
169 | + done | ||
170 | + block_len=$((block_len / 2)) | ||
171 | +done | ||
172 | + | ||
173 | +# Fill up to 9M (do not include this in the reference output either, | ||
174 | +# $ofs is random for all we know) | ||
175 | +$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null | ||
176 | + | ||
177 | +# Make space as follows: | ||
178 | +# - For the first refblock: Right at the beginning of the image (this | ||
179 | +# refblock is placed in the first place possible), | ||
180 | +# - For the reftable somewhere soon afterwards, still near the | ||
181 | +# beginning of the image (i.e. covered by the first refblock); the | ||
182 | +# reftable too is placed in the first place possible, but only after | ||
183 | +# all refblocks have been placed) | ||
184 | +# No space is needed for the other refblocks, because no refblock is | ||
185 | +# put before the space it covers. In this test case, we do not mind | ||
186 | +# if they are placed at the image file's end. | ||
187 | + | ||
188 | +# Before we make that space, we have to find out the host offset of | ||
189 | +# the area that belonged to the two data clusters at guest offset 4k, | ||
190 | +# because we expect the reftable to be placed there, and we will have | ||
191 | +# to verify that it is. | ||
192 | + | ||
193 | +l1_offset=$(peek_file_be "$TEST_IMG" 40 8) | ||
194 | +l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) | ||
195 | +l2_offset=$((l2_offset & 0x00fffffffffffe00)) | ||
196 | +data_4k_offset=$(peek_file_be "$TEST_IMG" \ | ||
197 | + $((l2_offset + 4096 / 512 * 8)) 8) | ||
198 | +data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) | ||
199 | + | ||
200 | +$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io | ||
201 | + | ||
202 | +# Corrupt the image by saying the image header was not allocated | ||
203 | +rt_offset=$(peek_file_be "$TEST_IMG" 48 8) | ||
204 | +rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) | ||
205 | +poke_file "$TEST_IMG" $rb_offset "\x00\x00" | ||
206 | + | ||
207 | +echo | ||
208 | +# The only leaks there can be are the old refcount structures that are | ||
209 | +# leaked during rebuilding, no need to clutter the output with them | ||
210 | +_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' | ||
211 | +echo | ||
212 | + | ||
213 | +# Check whether the reftable was put where we expected | ||
214 | +rt_offset=$(peek_file_be "$TEST_IMG" 48 8) | ||
215 | +if [[ $rt_offset -eq $data_4k_offset ]]; then | ||
216 | + echo 'OK: Reftable is where we expect it' | ||
217 | +else | ||
218 | + echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" | ||
219 | +fi | ||
220 | + | ||
221 | +echo | ||
222 | +echo '--- Rebuilding refcount structures on block devices ---' | ||
223 | +echo | ||
224 | + | ||
225 | +# A block device cannot really grow, at least not during qemu-img | ||
226 | +# check. As mentioned in the above cases, rebuilding the refcount | ||
227 | +# structure may lead to new refcount structures being written after | ||
228 | +# the end of the image, and in the past that happened even if there | ||
229 | +# was more than sufficient space in the image. Such post-EOF writes | ||
230 | +# will not work on block devices, so test that the new algorithm | ||
231 | +# avoids it. | ||
232 | + | ||
233 | +# If we have passwordless sudo and losetup, we can use those to create | ||
234 | +# a block device. Otherwise, we can resort to qemu's FUSE export to | ||
235 | +# create a file that isn't growable, which effectively tests the same | ||
236 | +# thing. | ||
237 | + | ||
238 | +_cleanup_test_img | ||
239 | +truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" | ||
240 | + | ||
241 | +if $loopdev; then | ||
242 | + export_mp=$(sudo -n losetup --show -f "$TEST_IMG") | ||
243 | + export_mp_driver=host_device | ||
244 | + sudo -n chmod go+rw "$export_mp" | ||
245 | +else | ||
246 | + # Create non-growable FUSE export that is a bit like an empty | ||
247 | + # block device | ||
248 | + export_mp="$TEST_DIR/fuse-export" | ||
249 | + export_mp_driver=file | ||
250 | + touch "$export_mp" | ||
251 | + | ||
252 | + $QSD \ | ||
253 | + --blockdev file,node-name=export-node,filename="$TEST_IMG" \ | ||
254 | + --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ | ||
255 | + --pidfile "$TEST_DIR/qsd.pid" \ | ||
256 | + --daemonize | ||
257 | +fi | ||
258 | + | ||
259 | +# Now create a qcow2 image on the device -- unfortunately, qemu-img | ||
260 | +# create force-creates the file, so we have to resort to the | ||
261 | +# blockdev-create job. | ||
262 | +_launch_qemu \ | ||
263 | + --blockdev $export_mp_driver,node-name=file,filename="$export_mp" | ||
264 | + | ||
265 | +_send_qemu_cmd \ | ||
266 | + $QEMU_HANDLE \ | ||
267 | + '{ "execute": "qmp_capabilities" }' \ | ||
268 | + 'return' | ||
269 | + | ||
270 | +# Small cluster size again, so the image needs multiple refblocks | ||
271 | +_send_qemu_cmd \ | ||
272 | + $QEMU_HANDLE \ | ||
273 | + '{ "execute": "blockdev-create", | ||
274 | + "arguments": { | ||
275 | + "job-id": "create", | ||
276 | + "options": { | ||
277 | + "driver": "qcow2", | ||
278 | + "file": "file", | ||
279 | + "size": '$((64 * 1024 * 1024))', | ||
280 | + "cluster-size": 512 | ||
281 | + } } }' \ | ||
282 | + '"concluded"' | ||
283 | + | ||
284 | +_send_qemu_cmd \ | ||
285 | + $QEMU_HANDLE \ | ||
286 | + '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ | ||
287 | + 'return' | ||
288 | + | ||
289 | +_send_qemu_cmd \ | ||
290 | + $QEMU_HANDLE \ | ||
291 | + '{ "execute": "quit" }' \ | ||
292 | + 'return' | ||
293 | + | ||
294 | +wait=y _cleanup_qemu | ||
295 | +echo | ||
296 | + | ||
297 | +# Write some data | ||
298 | +$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io | ||
299 | + | ||
300 | +# Corrupt the image by saying the image header was not allocated | ||
301 | +rt_offset=$(peek_file_be "$export_mp" 48 8) | ||
302 | +rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) | ||
303 | +poke_file "$export_mp" $rb_offset "\x00\x00" | ||
304 | + | ||
305 | +# Repairing such a simple case should just work | ||
306 | +# (We used to put the reftable at the end of the image file, which can | ||
307 | +# never work for non-growable devices.) | ||
308 | +echo | ||
309 | +TEST_IMG="$export_mp" _check_test_img -r all \ | ||
310 | + | grep -v '^Repairing cluster.*refcount=1 reference=0' | ||
311 | + | ||
312 | +if $loopdev; then | ||
313 | + sudo -n losetup -d "$export_mp" | ||
314 | +else | ||
315 | + qsd_pid=$(cat "$TEST_DIR/qsd.pid") | ||
316 | + kill -TERM "$qsd_pid" | ||
317 | + # Wait for process to exit (cannot `wait` because the QSD is daemonized) | ||
318 | + while [ -f "$TEST_DIR/qsd.pid" ]; do | ||
319 | + true | ||
320 | + done | ||
321 | +fi | ||
322 | + | ||
323 | # success, all done | ||
324 | echo '*** done' | ||
325 | rm -f $seq.full | ||
326 | diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out | ||
11 | index XXXXXXX..XXXXXXX 100644 | 327 | index XXXXXXX..XXXXXXX 100644 |
12 | --- a/block/io.c | 328 | --- a/tests/qemu-iotests/108.out |
13 | +++ b/block/io.c | 329 | +++ b/tests/qemu-iotests/108.out |
14 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) | 330 | @@ -XXX,XX +XXX,XX @@ The following inconsistencies were found and repaired: |
15 | 331 | 0 leaked clusters | |
16 | static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) | 332 | 1 corruptions |
17 | { | 333 | |
18 | + dst->pdiscard_alignment = MAX(dst->pdiscard_alignment, | 334 | +Double checking the fixed image now... |
19 | + src->pdiscard_alignment); | 335 | +No errors were found on the image. |
20 | dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); | 336 | + |
21 | dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer); | 337 | +=== Check rebuilt reftable location === |
22 | dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer, | 338 | + |
339 | +--- Does the image size increase? --- | ||
340 | + | ||
341 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 | ||
342 | +wrote 65536/65536 bytes at offset 0 | ||
343 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
344 | + | ||
345 | +ERROR cluster 0 refcount=0 reference=1 | ||
346 | +Rebuilding refcount structure | ||
347 | +The following inconsistencies were found and repaired: | ||
348 | + | ||
349 | + 0 leaked clusters | ||
350 | + 1 corruptions | ||
351 | + | ||
352 | +Double checking the fixed image now... | ||
353 | +No errors were found on the image. | ||
354 | + | ||
355 | +OK: Image size did not change | ||
356 | + | ||
357 | +--- Will the reftable occupy a hole specifically left for it? --- | ||
358 | + | ||
359 | +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 | ||
360 | +wrote 8388608/8388608 bytes at offset 0 | ||
361 | +8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
362 | +discard 512/512 bytes at offset 0 | ||
363 | +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
364 | +discard 1024/1024 bytes at offset 4096 | ||
365 | +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
366 | + | ||
367 | +ERROR cluster 0 refcount=0 reference=1 | ||
368 | +Rebuilding refcount structure | ||
369 | +The following inconsistencies were found and repaired: | ||
370 | + | ||
371 | + 0 leaked clusters | ||
372 | + 1 corruptions | ||
373 | + | ||
374 | +Double checking the fixed image now... | ||
375 | +No errors were found on the image. | ||
376 | + | ||
377 | +OK: Reftable is where we expect it | ||
378 | + | ||
379 | +--- Rebuilding refcount structures on block devices --- | ||
380 | + | ||
381 | +{ "execute": "qmp_capabilities" } | ||
382 | +{"return": {}} | ||
383 | +{ "execute": "blockdev-create", | ||
384 | + "arguments": { | ||
385 | + "job-id": "create", | ||
386 | + "options": { | ||
387 | + "driver": "IMGFMT", | ||
388 | + "file": "file", | ||
389 | + "size": 67108864, | ||
390 | + "cluster-size": 512 | ||
391 | + } } } | ||
392 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} | ||
393 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} | ||
394 | +{"return": {}} | ||
395 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} | ||
396 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} | ||
397 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} | ||
398 | +{ "execute": "job-dismiss", "arguments": { "id": "create" } } | ||
399 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} | ||
400 | +{"return": {}} | ||
401 | +{ "execute": "quit" } | ||
402 | +{"return": {}} | ||
403 | +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} | ||
404 | + | ||
405 | +wrote 65536/65536 bytes at offset 0 | ||
406 | +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
407 | + | ||
408 | +ERROR cluster 0 refcount=0 reference=1 | ||
409 | +Rebuilding refcount structure | ||
410 | +The following inconsistencies were found and repaired: | ||
411 | + | ||
412 | + 0 leaked clusters | ||
413 | + 1 corruptions | ||
414 | + | ||
415 | Double checking the fixed image now... | ||
416 | No errors were found on the image. | ||
417 | *** done | ||
23 | -- | 418 | -- |
24 | 2.31.1 | 419 | 2.35.1 |
25 | diff view generated by jsdifflib |
1 | It can be difficult to debug issues with BHs in production environments. | 1 | Instead of fprint()-ing error messages in rebuild_refcount_structure() |
---|---|---|---|
2 | Although BHs can usually be identified by looking up their ->cb() | 2 | and its rebuild_refcounts_write_refblocks() helper, pass them through an |
3 | function pointer, this requires debug information for the program. It is | 3 | Error object to qcow2_check_refcounts() (which will then print it). |
4 | also not possible to print human-readable diagnostics about BHs because | ||
5 | they have no identifier. | ||
6 | 4 | ||
7 | This patch adds a name to each BH. The name is not unique per instance | 5 | Suggested-by: Eric Blake <eblake@redhat.com> |
8 | but differentiates between cb() functions, which is usually enough. It's | 6 | Signed-off-by: Hanna Reitz <hreitz@redhat.com> |
9 | done by changing aio_bh_new() and friends to macros that stringify cb. | 7 | Message-Id: <20220405134652.19278-4-hreitz@redhat.com> |
8 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
9 | --- | ||
10 | block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- | ||
11 | 1 file changed, 19 insertions(+), 14 deletions(-) | ||
10 | 12 | ||
11 | The next patch will use the name field when reporting leaked BHs. | 13 | diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c |
12 | |||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
15 | Message-Id: <20210414200247.917496-2-stefanha@redhat.com> | ||
16 | --- | ||
17 | include/block/aio.h | 31 ++++++++++++++++++++++++++++--- | ||
18 | include/qemu/main-loop.h | 4 +++- | ||
19 | tests/unit/ptimer-test-stubs.c | 2 +- | ||
20 | util/async.c | 9 +++++++-- | ||
21 | util/main-loop.c | 4 ++-- | ||
22 | 5 files changed, 41 insertions(+), 9 deletions(-) | ||
23 | |||
24 | diff --git a/include/block/aio.h b/include/block/aio.h | ||
25 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/include/block/aio.h | 15 | --- a/block/qcow2-refcount.c |
27 | +++ b/include/block/aio.h | 16 | +++ b/block/qcow2-refcount.c |
28 | @@ -XXX,XX +XXX,XX @@ void aio_context_acquire(AioContext *ctx); | 17 | @@ -XXX,XX +XXX,XX @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, |
29 | /* Relinquish ownership of the AioContext. */ | 18 | static int rebuild_refcounts_write_refblocks( |
30 | void aio_context_release(AioContext *ctx); | 19 | BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, |
31 | 20 | int64_t first_cluster, int64_t end_cluster, | |
32 | +/** | 21 | - uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr |
33 | + * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will | 22 | + uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, |
34 | + * run only once and as soon as possible. | 23 | + Error **errp |
35 | + * | 24 | ) |
36 | + * @name: A human-readable identifier for debugging purposes. | ||
37 | + */ | ||
38 | +void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | ||
39 | + const char *name); | ||
40 | + | ||
41 | /** | ||
42 | * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run | ||
43 | * only once and as soon as possible. | ||
44 | + * | ||
45 | + * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the | ||
46 | + * name string. | ||
47 | */ | ||
48 | -void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
49 | +#define aio_bh_schedule_oneshot(ctx, cb, opaque) \ | ||
50 | + aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb))) | ||
51 | |||
52 | /** | ||
53 | - * aio_bh_new: Allocate a new bottom half structure. | ||
54 | + * aio_bh_new_full: Allocate a new bottom half structure. | ||
55 | * | ||
56 | * Bottom halves are lightweight callbacks whose invocation is guaranteed | ||
57 | * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure | ||
58 | * is opaque and must be allocated prior to its use. | ||
59 | + * | ||
60 | + * @name: A human-readable identifier for debugging purposes. | ||
61 | */ | ||
62 | -QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
63 | +QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | ||
64 | + const char *name); | ||
65 | + | ||
66 | +/** | ||
67 | + * aio_bh_new: Allocate a new bottom half structure | ||
68 | + * | ||
69 | + * A convenience wrapper for aio_bh_new_full() that uses the cb as the name | ||
70 | + * string. | ||
71 | + */ | ||
72 | +#define aio_bh_new(ctx, cb, opaque) \ | ||
73 | + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) | ||
74 | |||
75 | /** | ||
76 | * aio_notify: Force processing of pending events. | ||
77 | diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h | ||
78 | index XXXXXXX..XXXXXXX 100644 | ||
79 | --- a/include/qemu/main-loop.h | ||
80 | +++ b/include/qemu/main-loop.h | ||
81 | @@ -XXX,XX +XXX,XX @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); | ||
82 | |||
83 | void qemu_fd_register(int fd); | ||
84 | |||
85 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); | ||
86 | +#define qemu_bh_new(cb, opaque) \ | ||
87 | + qemu_bh_new_full((cb), (opaque), (stringify(cb))) | ||
88 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); | ||
89 | void qemu_bh_schedule_idle(QEMUBH *bh); | ||
90 | |||
91 | enum { | ||
92 | diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/tests/unit/ptimer-test-stubs.c | ||
95 | +++ b/tests/unit/ptimer-test-stubs.c | ||
96 | @@ -XXX,XX +XXX,XX @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) | ||
97 | return deadline; | ||
98 | } | ||
99 | |||
100 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) | ||
101 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) | ||
102 | { | 25 | { |
103 | QEMUBH *bh = g_new(QEMUBH, 1); | 26 | BDRVQcow2State *s = bs->opaque; |
104 | 27 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcounts_write_refblocks( | |
105 | diff --git a/util/async.c b/util/async.c | 28 | nb_clusters, |
106 | index XXXXXXX..XXXXXXX 100644 | 29 | &first_free_cluster); |
107 | --- a/util/async.c | 30 | if (refblock_offset < 0) { |
108 | +++ b/util/async.c | 31 | - fprintf(stderr, "ERROR allocating refblock: %s\n", |
109 | @@ -XXX,XX +XXX,XX @@ enum { | 32 | - strerror(-refblock_offset)); |
110 | 33 | + error_setg_errno(errp, -refblock_offset, | |
111 | struct QEMUBH { | 34 | + "ERROR allocating refblock"); |
112 | AioContext *ctx; | 35 | return refblock_offset; |
113 | + const char *name; | 36 | } |
114 | QEMUBHFunc *cb; | 37 | |
115 | void *opaque; | 38 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcounts_write_refblocks( |
116 | QSLIST_ENTRY(QEMUBH) next; | 39 | on_disk_reftable_entries * |
117 | @@ -XXX,XX +XXX,XX @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) | 40 | REFTABLE_ENTRY_SIZE); |
118 | return bh; | 41 | if (!on_disk_reftable) { |
119 | } | 42 | + error_setg(errp, "ERROR allocating reftable memory"); |
120 | 43 | return -ENOMEM; | |
121 | -void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | 44 | } |
122 | +void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, | 45 | |
123 | + void *opaque, const char *name) | 46 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcounts_write_refblocks( |
47 | ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, | ||
48 | s->cluster_size, false); | ||
49 | if (ret < 0) { | ||
50 | - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); | ||
51 | + error_setg_errno(errp, -ret, "ERROR writing refblock"); | ||
52 | return ret; | ||
53 | } | ||
54 | |||
55 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcounts_write_refblocks( | ||
56 | ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, | ||
57 | s->cluster_size); | ||
58 | if (ret < 0) { | ||
59 | - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); | ||
60 | + error_setg_errno(errp, -ret, "ERROR writing refblock"); | ||
61 | return ret; | ||
62 | } | ||
63 | |||
64 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcounts_write_refblocks( | ||
65 | static int rebuild_refcount_structure(BlockDriverState *bs, | ||
66 | BdrvCheckResult *res, | ||
67 | void **refcount_table, | ||
68 | - int64_t *nb_clusters) | ||
69 | + int64_t *nb_clusters, | ||
70 | + Error **errp) | ||
124 | { | 71 | { |
125 | QEMUBH *bh; | 72 | BDRVQcow2State *s = bs->opaque; |
126 | bh = g_new(QEMUBH, 1); | 73 | int64_t reftable_offset = -1; |
127 | @@ -XXX,XX +XXX,XX @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | 74 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs, |
128 | .ctx = ctx, | 75 | rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, |
129 | .cb = cb, | 76 | 0, *nb_clusters, |
130 | .opaque = opaque, | 77 | &on_disk_reftable, |
131 | + .name = name, | 78 | - &on_disk_reftable_entries); |
132 | }; | 79 | + &on_disk_reftable_entries, errp); |
133 | aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT); | 80 | if (reftable_size_changed < 0) { |
134 | } | 81 | res->check_errors++; |
135 | 82 | ret = reftable_size_changed; | |
136 | -QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | 83 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs, |
137 | +QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | 84 | refcount_table, nb_clusters, |
138 | + const char *name) | 85 | &first_free_cluster); |
139 | { | 86 | if (reftable_offset < 0) { |
140 | QEMUBH *bh; | 87 | - fprintf(stderr, "ERROR allocating reftable: %s\n", |
141 | bh = g_new(QEMUBH, 1); | 88 | - strerror(-reftable_offset)); |
142 | @@ -XXX,XX +XXX,XX @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | 89 | + error_setg_errno(errp, -reftable_offset, |
143 | .ctx = ctx, | 90 | + "ERROR allocating reftable"); |
144 | .cb = cb, | 91 | res->check_errors++; |
145 | .opaque = opaque, | 92 | ret = reftable_offset; |
146 | + .name = name, | 93 | goto fail; |
147 | }; | 94 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs, |
148 | return bh; | 95 | reftable_start_cluster, |
149 | } | 96 | reftable_end_cluster, |
150 | diff --git a/util/main-loop.c b/util/main-loop.c | 97 | &on_disk_reftable, |
151 | index XXXXXXX..XXXXXXX 100644 | 98 | - &on_disk_reftable_entries); |
152 | --- a/util/main-loop.c | 99 | + &on_disk_reftable_entries, errp); |
153 | +++ b/util/main-loop.c | 100 | if (reftable_size_changed < 0) { |
154 | @@ -XXX,XX +XXX,XX @@ void main_loop_wait(int nonblocking) | 101 | res->check_errors++; |
155 | 102 | ret = reftable_size_changed; | |
156 | /* Functions to operate on the main QEMU AioContext. */ | 103 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs, |
157 | 104 | ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, | |
158 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) | 105 | false); |
159 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) | 106 | if (ret < 0) { |
160 | { | 107 | - fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); |
161 | - return aio_bh_new(qemu_aio_context, cb, opaque); | 108 | + error_setg_errno(errp, -ret, "ERROR writing reftable"); |
162 | + return aio_bh_new_full(qemu_aio_context, cb, opaque, name); | 109 | goto fail; |
163 | } | 110 | } |
164 | 111 | ||
165 | /* | 112 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs, |
113 | ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, | ||
114 | reftable_length); | ||
115 | if (ret < 0) { | ||
116 | - fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); | ||
117 | + error_setg_errno(errp, -ret, "ERROR writing reftable"); | ||
118 | goto fail; | ||
119 | } | ||
120 | |||
121 | @@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs, | ||
122 | &reftable_offset_and_clusters, | ||
123 | sizeof(reftable_offset_and_clusters)); | ||
124 | if (ret < 0) { | ||
125 | - fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); | ||
126 | + error_setg_errno(errp, -ret, "ERROR setting reftable"); | ||
127 | goto fail; | ||
128 | } | ||
129 | |||
130 | @@ -XXX,XX +XXX,XX @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, | ||
131 | if (rebuild && (fix & BDRV_FIX_ERRORS)) { | ||
132 | BdrvCheckResult old_res = *res; | ||
133 | int fresh_leaks = 0; | ||
134 | + Error *local_err = NULL; | ||
135 | |||
136 | fprintf(stderr, "Rebuilding refcount structure\n"); | ||
137 | ret = rebuild_refcount_structure(bs, res, &refcount_table, | ||
138 | - &nb_clusters); | ||
139 | + &nb_clusters, &local_err); | ||
140 | if (ret < 0) { | ||
141 | + error_report_err(local_err); | ||
142 | goto fail; | ||
143 | } | ||
144 | |||
166 | -- | 145 | -- |
167 | 2.31.1 | 146 | 2.35.1 |
168 | diff view generated by jsdifflib |