:p
atchew
Login
This series introduces new block allocator scheme into unused data blocks inside the image first and only after that extends the file. On top of that naive implementation of discard and write-zeroes (through the discard) is added. There are also a bunch of bugs revealed in the code during the implementation and testing. Signed-off-by: Denis V. Lunev <den@openvz.org> Denis V. Lunev (21): parallels: fix formatting in bdrv_parallels initialization parallels: mark driver as supporting CBT parallels: invent parallels_opts_prealloc() helper to parse prealloc opts parallels: return earler in fail_format branch in parallels_open() parallels: return earlier from parallels_open() function on error parallels: refactor path when we need to re-check image in parallels_open parallels: create mark_used() helper which sets bit in used bitmap tests: ensure that image validation will not cure the corruption parallels: fix broken parallels_check_data_off() parallels: add test which will validate data_off fixes through repair parallels: collect bitmap of used clusters at open tests: fix broken deduplication check in parallels format test tests: test self-cure of parallels image with duplicated clusters parallels: accept multiple clusters in mark_used() parallels: update used bitmap in allocate_cluster parallels: naive implementation of allocate_clusters with used bitmap parallels: improve readability of allocate_clusters parallels: naive implementation of parallels_co_pdiscard tests: extend test 131 to cover availability of the discard operation parallels: naive implementation of parallels_co_pwrite_zeroes tests: extend test 131 to cover availability of the write-zeroes block/parallels.c | 382 ++++++++++++++---- block/parallels.h | 3 + tests/qemu-iotests/131 | 51 +++ tests/qemu-iotests/131.out | 58 +++ tests/qemu-iotests/tests/parallels-checks | 76 +++- tests/qemu-iotests/tests/parallels-checks.out | 65 ++- 6 files changed, 534 insertions(+), 101 deletions(-) -- 2.34.1
Old code is ugly and contains tabulations. There are no functional changes in this patch. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) } static BlockDriver bdrv_parallels = { - .format_name = "parallels", - .instance_size = sizeof(BDRVParallelsState), - .bdrv_probe = parallels_probe, - .bdrv_open = parallels_open, - .bdrv_close = parallels_close, - .bdrv_child_perm = bdrv_default_perms, - .bdrv_co_block_status = parallels_co_block_status, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_flush_to_os = parallels_co_flush_to_os, - .bdrv_co_readv = parallels_co_readv, - .bdrv_co_writev = parallels_co_writev, - .is_format = true, - .supports_backing = true, - .bdrv_co_create = parallels_co_create, - .bdrv_co_create_opts = parallels_co_create_opts, - .bdrv_co_check = parallels_co_check, - .create_opts = ¶llels_create_opts, + .format_name = "parallels", + .instance_size = sizeof(BDRVParallelsState), + .create_opts = ¶llels_create_opts, + .is_format = true, + .supports_backing = true, + + .bdrv_has_zero_init = bdrv_has_zero_init_1, + + .bdrv_probe = parallels_probe, + .bdrv_open = parallels_open, + .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_block_status = parallels_co_block_status, + .bdrv_co_flush_to_os = parallels_co_flush_to_os, + .bdrv_co_readv = parallels_co_readv, + .bdrv_co_writev = parallels_co_writev, + .bdrv_co_create = parallels_co_create, + .bdrv_co_create_opts = parallels_co_create_opts, + .bdrv_co_check = parallels_co_check, }; static void bdrv_parallels_init(void) -- 2.34.1
Parallels driver indeed support Parallels Dirty Bitmap Feature in read-only mode. The patch adds bdrv_supports_persistent_dirty_bitmap() callback which always return 1 to indicate that. This will allow to copy CBT from Parallels image with qemu-img. Note: read-write support is signalled through bdrv_co_can_store_new_dirty_bitmap() and is different. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) error_free(s->migration_blocker); } +static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs) +{ + return 1; +} + static BlockDriver bdrv_parallels = { .format_name = "parallels", .instance_size = sizeof(BDRVParallelsState), @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { .supports_backing = true, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_supports_persistent_dirty_bitmap = parallels_is_support_dirty_bitmaps, .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, -- 2.34.1
Parallels driver indeed support Parallels Dirty Bitmap Feature in read-only mode. The patch adds bdrv_supports_persistent_dirty_bitmap() callback which always return 1 to indicate that. This will allow to copy CBT from Parallels image with qemu-img. Note: read-write support is signalled through bdrv_co_can_store_new_dirty_bitmap() and is different. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) error_free(s->migration_blocker); } +static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs) +{ + return 1; +} + static BlockDriver bdrv_parallels = { .format_name = "parallels", .instance_size = sizeof(BDRVParallelsState), @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { .supports_backing = true, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_supports_persistent_dirty_bitmap = parallels_is_support_dirty_bitmaps, .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, -- 2.34.1
This patch creates above mentioned helper and moves its usage to the beginning of parallels_open(). This simplifies parallels_open() a bit. The patch also ensures that we store prealloc_size on block driver state always in sectors. This makes code cleaner and avoids wrong opinion at the assignment that the value is in bytes. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 65 +++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_update_header(BlockDriverState *bs) return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0); } + +static int parallels_opts_prealloc(BlockDriverState *bs, QDict *options, + Error **errp) +{ + char *buf; + int64_t bytes; + BDRVParallelsState *s = bs->opaque; + Error *local_err = NULL; + QemuOpts *opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); + if (!opts) { + return -ENOMEM; + } + + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + return -EINVAL; + } + + bytes = qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); + s->prealloc_size = bytes >> BDRV_SECTOR_BITS; + buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); + /* prealloc_mode can be downgraded later during allocate_clusters */ + s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, + PRL_PREALLOC_MODE_FALLOCATE, + &local_err); + g_free(buf); + if (local_err != NULL) { + error_propagate(errp, local_err); + return -EINVAL; + } + return 0; +} + static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, int ret, size, i; int64_t file_nb_sectors, sector; uint32_t data_start; - QemuOpts *opts = NULL; - Error *local_err = NULL; - char *buf; bool data_off_is_correct; + ret = parallels_opts_prealloc(bs, options, errp); + if (ret < 0) { + return ret; + } + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { return ret; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -EFBIG; goto fail; } + s->prealloc_size = MAX(s->tracks, s->prealloc_size); s->cluster_size = s->tracks << BDRV_SECTOR_BITS; s->bat_size = le32_to_cpu(ph.bat_entries); @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->header_size = size; } - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); - if (!opts) { - goto fail_options; - } - - if (!qemu_opts_absorb_qdict(opts, options, errp)) { - goto fail_options; - } - - s->prealloc_size = - qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); - s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); - buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); - /* prealloc_mode can be downgraded later during allocate_clusters */ - s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, - PRL_PREALLOC_MODE_FALLOCATE, - &local_err); - g_free(buf); - if (local_err != NULL) { - error_propagate(errp, local_err); - goto fail_options; - } - if (ph.ext_off) { if (flags & BDRV_O_RDWR) { /* @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, fail_format: error_setg(errp, "Image not in Parallels format"); -fail_options: ret = -EINVAL; fail: /* -- 2.34.1
We do not need to perform any deallocation/cleanup if wrong format is detected. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, fail_format: error_setg(errp, "Image not in Parallels format"); - ret = -EINVAL; + return -EINVAL; + fail: /* * "s" object was allocated by g_malloc0 so we can safely -- 2.34.1
At the beginning of the function we can return immediately until we really allocate s->header. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0); if (ret < 0) { - goto fail; + return ret; } bs->total_sectors = le64_to_cpu(ph.nb_sectors); @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->tracks = le32_to_cpu(ph.tracks); if (s->tracks == 0) { error_setg(errp, "Invalid image: Zero sectors per track"); - ret = -EINVAL; - goto fail; + return -EINVAL; } if (s->tracks > INT32_MAX/513) { error_setg(errp, "Invalid image: Too big cluster"); - ret = -EFBIG; - goto fail; + return -EFBIG; } s->prealloc_size = MAX(s->tracks, s->prealloc_size); s->cluster_size = s->tracks << BDRV_SECTOR_BITS; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_size = le32_to_cpu(ph.bat_entries); if (s->bat_size > INT_MAX / sizeof(uint32_t)) { error_setg(errp, "Catalog too large"); - ret = -EFBIG; - goto fail; + return -EFBIG; } size = bat_entry_off(s->bat_size); s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs)); s->header = qemu_try_blockalign(bs->file->bs, s->header_size); if (s->header == NULL) { - ret = -ENOMEM; - goto fail; + return -ENOMEM; } ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); -- 2.34.1
More conditions follows thus the check should be more scalable. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, int ret, size, i; int64_t file_nb_sectors, sector; uint32_t data_start; - bool data_off_is_correct; + bool need_check = false; ret = parallels_opts_prealloc(bs, options, errp); if (ret < 0) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_bitmap = (uint32_t *)(s->header + 1); if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - s->header_unclean = true; + need_check = s->header_unclean = true; } - data_off_is_correct = parallels_test_data_off(s, file_nb_sectors, - &data_start); + need_check = need_check || + !parallels_test_data_off(s, file_nb_sectors, &data_start); + s->data_start = data_start; s->data_end = s->data_start; if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->data_end = sector + s->tracks; } } + need_check = need_check || s->data_end > file_nb_sectors; /* * We don't repair the image here if it's opened for checks. Also we don't @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, return 0; } - /* - * Repair the image if it's dirty or - * out-of-image corruption was detected. - */ - if (s->data_end > file_nb_sectors || s->header_unclean - || !data_off_is_correct) { + /* Repair the image if corruption was detected. */ + if (need_check) { BdrvCheckResult res; ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); if (ret < 0) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } } - return 0; fail_format: -- 2.34.1
This functionality is used twice already and next patch will add more code with it. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_set_bat_entry(BDRVParallelsState *s, bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); } +static int mark_used(BlockDriverState *bs, + unsigned long *bitmap, uint32_t bitmap_size, int64_t off) +{ + BDRVParallelsState *s = bs->opaque; + uint32_t cluster_index = host_cluster_index(s, off); + if (cluster_index >= bitmap_size) { + return -E2BIG; + } + if (test_bit(cluster_index, bitmap)) { + return -EBUSY; + } + bitmap_set(bitmap, cluster_index, 1); + return 0; +} + static int64_t coroutine_fn GRAPH_RDLOCK allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, BDRVParallelsState *s = bs->opaque; int64_t host_off, host_sector, guest_sector; unsigned long *bitmap; - uint32_t i, bitmap_size, cluster_index, bat_entry; + uint32_t i, bitmap_size, bat_entry; int n, ret = 0; uint64_t *buf = NULL; bool fixed = false; @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, continue; } - cluster_index = host_cluster_index(s, host_off); - assert(cluster_index < bitmap_size); - if (!test_bit(cluster_index, bitmap)) { - bitmap_set(bitmap, cluster_index, 1); + ret = mark_used(bs, bitmap, bitmap_size, host_off); + assert(ret != -E2BIG); + if (ret == 0) { continue; } @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, * consistent for the new allocated clusters too. * * Note, clusters allocated outside the current image are not - * considered, and the bitmap size doesn't change. + * considered, and the bitmap size doesn't change. This specifically + * means that -E2BIG is OK. */ - cluster_index = host_cluster_index(s, host_off); - if (cluster_index < bitmap_size) { - bitmap_set(bitmap, cluster_index, 1); + ret = mark_used(bs, bitmap, bitmap_size, host_off); + if (ret == -EBUSY) { + res->check_errors++; + goto out_repair_bat; } fixed = true; -- 2.34.1
Since commit cfce1091d55322789582480798a891cbaf66924e Author: Alexander Ivanov <alexander.ivanov@virtuozzo.com> Date: Tue Jul 18 12:44:29 2023 +0200 parallels: Image repairing in parallels_open() there is a potential pit fall with calling qemu-io -c "read" The image is opened in read-write mode and thus could be potentially repaired. This could ruin testing process. The patch forces read-only opening for reads. In that case repairing is impossible. Signed-off-by: Denis V. Lunev <den@openvz.org> --- tests/qemu-iotests/tests/parallels-checks | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ file_size=`stat --printf="%s" "$TEST_IMG"` echo "file size: $file_size" echo "== check last cluster ==" -{ $QEMU_IO -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # Clear image _make_test_img $SIZE @@ -XXX,XX +XXX,XX @@ echo "== write another pattern to second cluster ==" { $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== corrupt image ==" poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00" echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== repair image ==" _check_test_img -r all echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== check first cluster on host ==" printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` -- 2.34.1
Once we have repaired data_off field in the header we should update s->data_start which is calculated on the base of it. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, res->corruptions++; if (fix & BDRV_FIX_ERRORS) { s->header->data_off = cpu_to_le32(data_off); + s->data_start = data_off; res->corruptions_fixed++; } -- 2.34.1
We have only check through self-repair and that proven to be not enough. Signed-off-by: Denis V. Lunev <den@openvz.org> --- tests/qemu-iotests/tests/parallels-checks | 17 +++++++++++++++++ tests/qemu-iotests/tests/parallels-checks.out | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff" echo "== check first cluster ==" { $QEMU_IO -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +# Clear image +_make_test_img $SIZE + +echo "== TEST DATA_OFF THROUGH REPAIR ==" + +echo "== write pattern to first cluster ==" +{ $QEMU_IO -c "write -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== spoil data_off field ==" +poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff" + +echo "== repair image ==" +_check_test_img -r all + +echo "== check first cluster ==" +{ $QEMU_IO -r -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -XXX,XX +XXX,XX @@ wrote 1048576/1048576 bytes at offset 0 Repairing data_off field has incorrect value read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +== TEST DATA_OFF THROUGH REPAIR == +== write pattern to first cluster == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== spoil data_off field == +== repair image == +Repairing data_off field has incorrect value +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. +== check first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done -- 2.34.1
If the operation is failed, we need to check image consistency if the problem is not about memory allocation. Bitmap adjustments in allocate_cluster are not performed yet. They worth to be separate. This was proven useful during debug of this series. Kept as is for future bissecting. It should be specifically noted that used bitmap must be recalculated if data_off has been fixed during image consistency check. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ block/parallels.h | 3 ++ 2 files changed, 76 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int mark_used(BlockDriverState *bs, return 0; } +/* + * Collect used bitmap. The image can contain errors, we should fill the + * bitmap anyway, as much as we can. This information will be used for + * error resolution. + */ +static int parallels_fill_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + int64_t payload_bytes; + uint32_t i; + int err = 0; + + payload_bytes = bdrv_co_getlength(bs->file->bs); + if (payload_bytes < 0) { + return payload_bytes; + } + payload_bytes -= s->data_start * BDRV_SECTOR_SIZE; + if (payload_bytes < 0) { + return -EINVAL; + } + + s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size); + if (s->used_bmap_size == 0) { + return 0; + } + s->used_bmap = bitmap_try_new(s->used_bmap_size); + if (s->used_bmap == NULL) { + return -ENOMEM; + } + + for (i = 0; i < s->bat_size; i++) { + int err2; + int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off); + if (err2 < 0 && err == 0) { + err = err2; + } + } + return err; +} + +static void parallels_free_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + s->used_bmap_size = 0; + g_free(s->used_bmap); +} + static int64_t coroutine_fn GRAPH_RDLOCK allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) @@ -XXX,XX +XXX,XX @@ parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, res->corruptions++; if (fix & BDRV_FIX_ERRORS) { + int err; s->header->data_off = cpu_to_le32(data_off); s->data_start = data_off; + + parallels_free_used_bitmap(bs); + err = parallels_fill_used_bitmap(bs); + if (err == -ENOMEM) { + res->check_errors++; + return err; + } + res->corruptions_fixed++; } @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } need_check = need_check || s->data_end > file_nb_sectors; + if (!need_check) { + ret = parallels_fill_used_bitmap(bs); + if (ret == -ENOMEM) { + goto fail; + } + need_check = need_check || ret < 0; /* These are correctable errors */ + } + /* * We don't repair the image here if it's opened for checks. Also we don't * want to change inactive images and can't change readonly images. @@ -XXX,XX +XXX,XX @@ fail: * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. */ + parallels_free_used_bitmap(bs); + error_free(s->migration_blocker); g_free(s->bat_dirty_bmap); qemu_vfree(s->header); @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) PREALLOC_MODE_OFF, 0, NULL); } + parallels_free_used_bitmap(bs); + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); diff --git a/block/parallels.h b/block/parallels.h index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -XXX,XX +XXX,XX @@ typedef struct BDRVParallelsState { unsigned long *bat_dirty_bmap; unsigned int bat_dirty_block; + unsigned long *used_bmap; + unsigned long used_bmap_size; + uint32_t *bat_bitmap; unsigned int bat_size; -- 2.34.1
If the operation is failed, we need to check image consistency if the problem is not about memory allocation. Bitmap adjustments in allocate_cluster are not performed yet. They worth to be separate. This was proven useful during debug of this series. Kept as is for future bissecting. It should be specifically noted that used bitmap must be recalculated if data_off has been fixed during image consistency check. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ block/parallels.h | 3 ++ 2 files changed, 76 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int mark_used(BlockDriverState *bs, return 0; } +/* + * Collect used bitmap. The image can contain errors, we should fill the + * bitmap anyway, as much as we can. This information will be used for + * error resolution. + */ +static int parallels_fill_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + int64_t payload_bytes; + uint32_t i; + int err = 0; + + payload_bytes = bdrv_co_getlength(bs->file->bs); + if (payload_bytes < 0) { + return payload_bytes; + } + payload_bytes -= s->data_start * BDRV_SECTOR_SIZE; + if (payload_bytes < 0) { + return -EINVAL; + } + + s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size); + if (s->used_bmap_size == 0) { + return 0; + } + s->used_bmap = bitmap_try_new(s->used_bmap_size); + if (s->used_bmap == NULL) { + return -ENOMEM; + } + + for (i = 0; i < s->bat_size; i++) { + int err2; + int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off); + if (err2 < 0 && err == 0) { + err = err2; + } + } + return err; +} + +static void parallels_free_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + s->used_bmap_size = 0; + g_free(s->used_bmap); +} + static int64_t coroutine_fn GRAPH_RDLOCK allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) @@ -XXX,XX +XXX,XX @@ parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, res->corruptions++; if (fix & BDRV_FIX_ERRORS) { + int err; s->header->data_off = cpu_to_le32(data_off); s->data_start = data_off; + + parallels_free_used_bitmap(bs); + err = parallels_fill_used_bitmap(bs); + if (err == -ENOMEM) { + res->check_errors++; + return err; + } + res->corruptions_fixed++; } @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } need_check = need_check || s->data_end > file_nb_sectors; + if (!need_check) { + ret = parallels_fill_used_bitmap(bs); + if (ret == -ENOMEM) { + goto fail; + } + need_check = need_check || ret < 0; /* These are correctable errors */ + } + /* * We don't repair the image here if it's opened for checks. Also we don't * want to change inactive images and can't change readonly images. @@ -XXX,XX +XXX,XX @@ fail: * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. */ + parallels_free_used_bitmap(bs); + error_free(s->migration_blocker); g_free(s->bat_dirty_bmap); qemu_vfree(s->header); @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) PREALLOC_MODE_OFF, 0, NULL); } + parallels_free_used_bitmap(bs); + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); diff --git a/block/parallels.h b/block/parallels.h index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -XXX,XX +XXX,XX @@ typedef struct BDRVParallelsState { unsigned long *bat_dirty_bmap; unsigned int bat_dirty_block; + unsigned long *used_bmap; + unsigned long used_bmap_size; + uint32_t *bat_bitmap; unsigned int bat_size; -- 2.34.1
Original check is broken as supposed reading from 2 different clusters results in read from the same file offset twice. This is definitely wrong. We should be sure that * the content of both clusters is correct after repair * clusters are at the different offsets after repair In order to check the latter we write some content into the first one and validate that fact. Signed-off-by: Denis V. Lunev <den@openvz.org> --- tests/qemu-iotests/tests/parallels-checks | 14 ++++++++++---- tests/qemu-iotests/tests/parallels-checks.out | 16 ++++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ echo "== check second cluster ==" echo "== repair image ==" _check_test_img -r all +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== check second cluster ==" { $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir -echo "== check first cluster on host ==" -printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` +echo "== write another pattern to the first clusters ==" +{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir -echo "== check second cluster on host ==" -printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` +echo "== check the second cluster (deduplicated) ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # Clear image _make_test_img $SIZE diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -XXX,XX +XXX,XX @@ The following inconsistencies were found and repaired: Double checking the fixed image now... No errors were found on the image. +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == check second cluster == read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -== check first cluster on host == -content: 0x11 -== check second cluster on host == -content: 0x11 +== write another pattern to the first clusters == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the second cluster (deduplicated) == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 == TEST DATA_OFF CHECK == == write pattern to first cluster == -- 2.34.1
The test is quite similar with the original one for duplicated clusters. There is the only difference in the operation which should fix the image. Signed-off-by: Denis V. Lunev <den@openvz.org> --- tests/qemu-iotests/tests/parallels-checks | 36 +++++++++++++++++++ tests/qemu-iotests/tests/parallels-checks.out | 31 ++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ echo "== check the second cluster (deduplicated) ==" # Clear image _make_test_img $SIZE +echo "== TEST DUPLICATION SELF-CURE ==" + +echo "== write pattern to whole image ==" +{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== write another pattern to second cluster ==" +{ $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + + +echo "== corrupt image ==" +poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00" + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster with self-repair ==" +{ $QEMU_IO -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== write another pattern to the first clusters ==" +{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the second cluster (deduplicated) ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +# Clear image +_make_test_img $SIZE + echo "== TEST DATA_OFF CHECK ==" echo "== write pattern to first cluster ==" diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0 read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +== TEST DUPLICATION SELF-CURE == +== write pattern to whole image == +wrote 4194304/4194304 bytes at offset 0 +4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== write another pattern to second cluster == +wrote 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== corrupt image == +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster with self-repair == +Repairing duplicate offset in BAT entry 1 +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== write another pattern to the first clusters == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the second cluster (deduplicated) == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 == TEST DATA_OFF CHECK == == write pattern to first cluster == wrote 1048576/1048576 bytes at offset 0 -- 2.34.1
This would be useful in the next patch in allocate_clusters(). This change would not imply serious performance drawbacks as usually image is full of data or are at the end of the bitmap. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_set_bat_entry(BDRVParallelsState *s, bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); } -static int mark_used(BlockDriverState *bs, - unsigned long *bitmap, uint32_t bitmap_size, int64_t off) +static int mark_used(BlockDriverState *bs, unsigned long *bitmap, + uint32_t bitmap_size, int64_t off, uint32_t count) { BDRVParallelsState *s = bs->opaque; uint32_t cluster_index = host_cluster_index(s, off); - if (cluster_index >= bitmap_size) { + unsigned long next_used; + if (cluster_index + count > bitmap_size) { return -E2BIG; } - if (test_bit(cluster_index, bitmap)) { + next_used = find_next_bit(bitmap, bitmap_size, cluster_index); + if (next_used < cluster_index + count) { return -EBUSY; } - bitmap_set(bitmap, cluster_index, 1); + bitmap_set(bitmap, cluster_index, count); return 0; } @@ -XXX,XX +XXX,XX @@ static int parallels_fill_used_bitmap(BlockDriverState *bs) continue; } - err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off); + err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 1); if (err2 < 0 && err == 0) { err = err2; } @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, continue; } - ret = mark_used(bs, bitmap, bitmap_size, host_off); + ret = mark_used(bs, bitmap, bitmap_size, host_off, 1); assert(ret != -E2BIG); if (ret == 0) { continue; @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, * considered, and the bitmap size doesn't change. This specifically * means that -E2BIG is OK. */ - ret = mark_used(bs, bitmap, bitmap_size, host_off); + ret = mark_used(bs, bitmap, bitmap_size, host_off, 1); if (ret == -EBUSY) { res->check_errors++; goto out_repair_bat; -- 2.34.1
We should extend the bitmap ff the file is extended and set the bit in the image used bitmap once the cluster is allocated. Sanity check at that moment also looks like a good idea. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, return len; } if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { + uint32_t new_usedsize; + space += s->prealloc_size; /* * We require the expanded size to read back as zero. If the @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { return ret; } + + new_usedsize = s->used_bmap_size + + (space << BDRV_SECTOR_BITS) / s->cluster_size; + s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, + new_usedsize); + s->used_bmap_size = new_usedsize; } /* @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } + ret = mark_used(bs, s->used_bmap, s->used_bmap_size, + s->data_end << BDRV_SECTOR_BITS, to_allocate); + if (ret < 0) { + /* Image consistency is broken. Alarm! */ + return ret; + } for (i = 0; i < to_allocate; i++) { parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); s->data_end += s->tracks; -- 2.34.1
The access to the bitmap is not optimized completely. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 51 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, { int ret = 0; BDRVParallelsState *s = bs->opaque; - int64_t pos, space, idx, to_allocate, i, len; + int64_t i, pos, idx, to_allocate, first_free, host_off; pos = block_status(s, sector_num, nb_sectors, pnum); if (pos > 0) { @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, */ assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); - space = to_allocate * s->tracks; - len = bdrv_co_getlength(bs->file->bs); - if (len < 0) { - return len; - } - if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { + first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size); + if (first_free == s->used_bmap_size) { uint32_t new_usedsize; + int64_t space = to_allocate * s->tracks + s->prealloc_size; + + host_off = s->data_end * BDRV_SECTOR_SIZE; - space += s->prealloc_size; /* * We require the expanded size to read back as zero. If the * user permitted truncation, we try that; but if it fails, we @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, new_usedsize); s->used_bmap_size = new_usedsize; + } else { + int64_t next_used; + next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free); + + /* Not enough continuous clusters in the middle, adjust the size */ + if (next_used - first_free < to_allocate) { + to_allocate = next_used - first_free; + *pnum = (idx + to_allocate) * s->tracks - sector_num; + } + + host_off = s->data_start * BDRV_SECTOR_SIZE; + host_off += first_free * s->cluster_size; + + /* + * No need to preallocate if we are using tail area from the above + * branch. In the other case we are likely re-using hole. Preallocate + * the space if required by the prealloc_mode. + */ + if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE && + host_off < s->data_end * BDRV_SECTOR_SIZE) { + ret = bdrv_co_pwrite_zeroes(bs->file, host_off, + s->cluster_size * to_allocate, 0); + if (ret < 0) { + return ret; + } + } } /* @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } - ret = mark_used(bs, s->used_bmap, s->used_bmap_size, - s->data_end << BDRV_SECTOR_BITS, to_allocate); + ret = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, to_allocate); if (ret < 0) { /* Image consistency is broken. Alarm! */ return ret; } for (i = 0; i < to_allocate; i++) { - parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); - s->data_end += s->tracks; + parallels_set_bat_entry(s, idx + i, + host_off / BDRV_SECTOR_SIZE / s->off_multiplier); + host_off += s->cluster_size; + } + if (host_off > s->data_end * BDRV_SECTOR_SIZE) { + s->data_end = host_off / BDRV_SECTOR_SIZE; } return bat2sect(s, idx) + sector_num % s->tracks; -- 2.34.1
Replace 'space' representing the amount of data to preallocate with 'bytes'. Rationale: * 'space' at each place is converted to bytes * the unit is more close to the variable name Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size); if (first_free == s->used_bmap_size) { uint32_t new_usedsize; - int64_t space = to_allocate * s->tracks + s->prealloc_size; + int64_t bytes = to_allocate * s->cluster_size; + bytes += s->prealloc_size * BDRV_SECTOR_SIZE; host_off = s->data_end * BDRV_SECTOR_SIZE; @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, * force the safer-but-slower fallocate. */ if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { - ret = bdrv_co_truncate(bs->file, - (s->data_end + space) << BDRV_SECTOR_BITS, + ret = bdrv_co_truncate(bs->file, host_off + bytes, false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL); if (ret == -ENOTSUP) { @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { - ret = bdrv_co_pwrite_zeroes(bs->file, - s->data_end << BDRV_SECTOR_BITS, - space << BDRV_SECTOR_BITS, 0); + ret = bdrv_co_pwrite_zeroes(bs->file, host_off, bytes, 0); } if (ret < 0) { return ret; } - new_usedsize = s->used_bmap_size + - (space << BDRV_SECTOR_BITS) / s->cluster_size; + new_usedsize = s->used_bmap_size + bytes / s->cluster_size; s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, new_usedsize); s->used_bmap_size = new_usedsize; -- 2.34.1
* Discarding with backing stores is not supported by the format. * There is no buffering/queueing of the discard operation. * Only operations aligned to the cluster are supported. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, return ret; } + +static int coroutine_fn GRAPH_RDLOCK_PTR +parallels_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + int ret = 0; + uint32_t cluster, count; + BDRVParallelsState *s = bs->opaque; + + /* + * The image does not support ZERO mark inside the BAT, which means that + * stale data could be exposed from the backing file. + */ + if (bs->backing) { + return -ENOTSUP; + } + + if (!QEMU_IS_ALIGNED(offset, s->cluster_size)) { + return -ENOTSUP; + } else if (!QEMU_IS_ALIGNED(bytes, s->cluster_size)) { + return -ENOTSUP; + } + + cluster = offset / s->cluster_size; + count = bytes / s->cluster_size; + + qemu_co_mutex_lock(&s->lock); + for (; count > 0; cluster++, count--) { + int64_t host_off = bat2sect(s, cluster) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + ret = bdrv_co_pdiscard(bs->file, cluster * s->cluster_size, + s->cluster_size); + if (ret < 0) { + goto done; + } + + parallels_set_bat_entry(s, cluster, 0); + bitmap_clear(s->used_bmap, host_cluster_index(s, host_off), 1); + } +done: + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void parallels_check_unclean(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { .bdrv_co_create = parallels_co_create, .bdrv_co_create_opts = parallels_co_create_opts, .bdrv_co_check = parallels_co_check, + .bdrv_co_pdiscard = parallels_co_pdiscard, }; static void bdrv_parallels_init(void) -- 2.34.1
This patch contains test which minimally tests discard and new cluster allocation logic. The following checks are added: * write 2 clusters, discard the first allocated * write another cluster, check that the hole is filled * write 2 clusters, discard the first allocated, write 1 cluster at non-aligned to cluster offset (2 new clusters should be allocated) Signed-off-by: Denis V. Lunev <den@openvz.org> --- tests/qemu-iotests/131 | 31 +++++++++++++++++++++++++++++++ tests/qemu-iotests/131.out | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -XXX,XX +XXX,XX @@ poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74" echo "== read corrupted image with repairing ==" { $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== check discard ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "discard 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check simple allocation over the discarded hole ==" + +{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check more complex allocation over the discard hole ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "discard $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +# There is 1 cluster hole. Fill it fully and allocate 1 cluster at the end +{ $QEMU_IO -c "write -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== allocate with backing ==" # Verify that allocating clusters works fine even when there is a backing image. # Regression test for a bug where we would pass a buffer read from the backing diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -XXX,XX +XXX,XX @@ read 524288/524288 bytes at offset 0 Repairing image was not closed correctly read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check discard == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x200000 TEST_DIR/t.IMGFMT +discard 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check simple allocation over the discarded hole == +wrote 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +0x200000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check more complex allocation over the discard hole == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 2097152 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 524288 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x100000 TEST_DIR/t.IMGFMT +0x100000 0x100000 TEST_DIR/t.IMGFMT +0x300000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 524288 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2097152/2097152 bytes at offset 1572864 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == allocate with backing == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 -- 2.34.1
The zero flag is missed in the Parallels format specification. We can resort to discard if we have no backing file. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ done: return ret; } +static int coroutine_fn GRAPH_RDLOCK +parallels_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + BdrvRequestFlags flags) +{ + /* + * The zero flag is missed in the Parallels format specification. We can + * resort to discard if we have no backing file (this condition is checked + * inside parallels_co_pdiscard(). + */ + return parallels_co_pdiscard(bs, offset, bytes); +} + + static void parallels_check_unclean(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { .bdrv_co_create_opts = parallels_co_create_opts, .bdrv_co_check = parallels_co_check, .bdrv_co_pdiscard = parallels_co_pdiscard, + .bdrv_co_pwrite_zeroes = parallels_co_pwrite_zeroes, }; static void bdrv_parallels_init(void) -- 2.34.1
This patch contains test which minimally tests write-zeroes on top of working discard. The following checks are added: * write 2 clusters, write-zero to the first allocated cluster * write 2 cluster, write-zero to the half the first allocated cluster Signed-off-by: Denis V. Lunev <den@openvz.org> --- tests/qemu-iotests/131 | 20 ++++++++++++++++++++ tests/qemu-iotests/131.out | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -XXX,XX +XXX,XX @@ _make_test_img $size { $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir { $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== check write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check cluster-partial write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== allocate with backing ==" # Verify that allocating clusters works fine even when there is a backing image. # Regression test for a bug where we would pass a buffer read from the backing diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -XXX,XX +XXX,XX @@ read 524288/524288 bytes at offset 0 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 2097152/2097152 bytes at offset 1572864 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check cluster-partial write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == allocate with backing == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 -- 2.34.1
This series introduces new block allocator scheme into unused data blocks inside the image first and only after that extends the file. On top of that naive implementation of discard and write-zeroes (through the discard) is added. There are also a bunch of bugs revealed in the code during the implementation and testing. Changes from v1: * added #3 - fix memory leak in parallels_open * rewritten #4 (orig #3) - reflecting leak fixed previous patch * fixed warning in #7 (orig #6), thanks Mike! * fixed typo in #16 (was #15) * fixed warning in #19 (was #18) * fixed wrong argument for bdrv_co_pdiscard in #19 (was #18) * improved discard tests in #20 (was #19) Signed-off-by: Denis V. Lunev <den@openvz.org> Denis V. Lunev (22): parallels: fix formatting in bdrv_parallels initialization parallels: mark driver as supporting CBT parallels: fix memory leak in parallels_open() parallels: invent parallels_opts_prealloc() helper to parse prealloc opts parallels: return earler in fail_format branch in parallels_open() parallels: return earlier from parallels_open() function on error parallels: refactor path when we need to re-check image in parallels_open parallels: create mark_used() helper which sets bit in used bitmap tests: ensure that image validation will not cure the corruption parallels: fix broken parallels_check_data_off() parallels: add test which will validate data_off fixes through repair parallels: collect bitmap of used clusters at open tests: fix broken deduplication check in parallels format test tests: test self-cure of parallels image with duplicated clusters parallels: accept multiple clusters in mark_used() parallels: update used bitmap in allocate_cluster parallels: naive implementation of allocate_clusters with used bitmap parallels: improve readability of allocate_clusters parallels: naive implementation of parallels_co_pdiscard tests: extend test 131 to cover availability of the discard operation parallels: naive implementation of parallels_co_pwrite_zeroes tests: extend test 131 to cover availability of the write-zeroes block/parallels.c | 389 ++++++++++++++---- block/parallels.h | 3 + tests/qemu-iotests/131 | 52 +++ tests/qemu-iotests/131.out | 60 +++ tests/qemu-iotests/tests/parallels-checks | 76 +++- tests/qemu-iotests/tests/parallels-checks.out | 65 ++- 6 files changed, 544 insertions(+), 101 deletions(-) -- 2.34.1
Old code is ugly and contains tabulations. There are no functional changes in this patch. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) } static BlockDriver bdrv_parallels = { - .format_name = "parallels", - .instance_size = sizeof(BDRVParallelsState), - .bdrv_probe = parallels_probe, - .bdrv_open = parallels_open, - .bdrv_close = parallels_close, - .bdrv_child_perm = bdrv_default_perms, - .bdrv_co_block_status = parallels_co_block_status, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_flush_to_os = parallels_co_flush_to_os, - .bdrv_co_readv = parallels_co_readv, - .bdrv_co_writev = parallels_co_writev, - .is_format = true, - .supports_backing = true, - .bdrv_co_create = parallels_co_create, - .bdrv_co_create_opts = parallels_co_create_opts, - .bdrv_co_check = parallels_co_check, - .create_opts = ¶llels_create_opts, + .format_name = "parallels", + .instance_size = sizeof(BDRVParallelsState), + .create_opts = ¶llels_create_opts, + .is_format = true, + .supports_backing = true, + + .bdrv_has_zero_init = bdrv_has_zero_init_1, + + .bdrv_probe = parallels_probe, + .bdrv_open = parallels_open, + .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_block_status = parallels_co_block_status, + .bdrv_co_flush_to_os = parallels_co_flush_to_os, + .bdrv_co_readv = parallels_co_readv, + .bdrv_co_writev = parallels_co_writev, + .bdrv_co_create = parallels_co_create, + .bdrv_co_create_opts = parallels_co_create_opts, + .bdrv_co_check = parallels_co_check, }; static void bdrv_parallels_init(void) -- 2.34.1
Parallels driver indeed support Parallels Dirty Bitmap Feature in read-only mode. The patch adds bdrv_supports_persistent_dirty_bitmap() callback which always return 1 to indicate that. This will allow to copy CBT from Parallels image with qemu-img. Note: read-write support is signalled through bdrv_co_can_store_new_dirty_bitmap() and is different. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) error_free(s->migration_blocker); } +static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs) +{ + return 1; +} + static BlockDriver bdrv_parallels = { .format_name = "parallels", .instance_size = sizeof(BDRVParallelsState), @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { .supports_backing = true, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_supports_persistent_dirty_bitmap = parallels_is_support_dirty_bitmaps, .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, -- 2.34.1
This patch contains test which minimally tests write-zeroes on top of working discard. The following checks are added: * write 2 clusters, write-zero to the first allocated cluster * write 2 cluster, write-zero to the half the first allocated cluster Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- tests/qemu-iotests/131 | 20 ++++++++++++++++++++ tests/qemu-iotests/131.out | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -XXX,XX +XXX,XX @@ _make_test_img $size { $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir { $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== check write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check cluster-partial write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== allocate with backing ==" # Verify that allocating clusters works fine even when there is a backing image. # Regression test for a bug where we would pass a buffer read from the backing diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -XXX,XX +XXX,XX @@ read 524288/524288 bytes at offset 0 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 2097152/2097152 bytes at offset 1572864 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check cluster-partial write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == allocate with backing == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 -- 2.34.1
We should free opts allocated through qemu_opts_create() at the end. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ fail_format: fail_options: ret = -EINVAL; fail: + qemu_opts_del(opts); /* * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. -- 2.34.1
This patch creates above mentioned helper and moves its usage to the beginning of parallels_open(). This simplifies parallels_open() a bit. The patch also ensures that we store prealloc_size on block driver state always in sectors. This makes code cleaner and avoids wrong opinion at the assignment that the value is in bytes. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 72 +++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_update_header(BlockDriverState *bs) return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0); } + +static int parallels_opts_prealloc(BlockDriverState *bs, QDict *options, + Error **errp) +{ + int err; + char *buf; + int64_t bytes; + BDRVParallelsState *s = bs->opaque; + Error *local_err = NULL; + QemuOpts *opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); + if (!opts) { + return -ENOMEM; + } + + err = -EINVAL; + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto done; + } + + bytes = qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); + s->prealloc_size = bytes >> BDRV_SECTOR_BITS; + buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); + /* prealloc_mode can be downgraded later during allocate_clusters */ + s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, + PRL_PREALLOC_MODE_FALLOCATE, + &local_err); + g_free(buf); + if (local_err != NULL) { + error_propagate(errp, local_err); + goto done; + } + err = 0; + +done: + qemu_opts_del(opts); + return err; +} + static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, int ret, size, i; int64_t file_nb_sectors, sector; uint32_t data_start; - QemuOpts *opts = NULL; - Error *local_err = NULL; - char *buf; bool data_off_is_correct; + ret = parallels_opts_prealloc(bs, options, errp); + if (ret < 0) { + return ret; + } + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { return ret; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -EFBIG; goto fail; } + s->prealloc_size = MAX(s->tracks, s->prealloc_size); s->cluster_size = s->tracks << BDRV_SECTOR_BITS; s->bat_size = le32_to_cpu(ph.bat_entries); @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->header_size = size; } - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); - if (!opts) { - goto fail_options; - } - - if (!qemu_opts_absorb_qdict(opts, options, errp)) { - goto fail_options; - } - - s->prealloc_size = - qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); - s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); - buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); - /* prealloc_mode can be downgraded later during allocate_clusters */ - s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, - PRL_PREALLOC_MODE_FALLOCATE, - &local_err); - g_free(buf); - if (local_err != NULL) { - error_propagate(errp, local_err); - goto fail_options; - } - if (ph.ext_off) { if (flags & BDRV_O_RDWR) { /* @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, fail_format: error_setg(errp, "Image not in Parallels format"); -fail_options: ret = -EINVAL; fail: - qemu_opts_del(opts); /* * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. -- 2.34.1
We do not need to perform any deallocation/cleanup if wrong format is detected. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, fail_format: error_setg(errp, "Image not in Parallels format"); - ret = -EINVAL; + return -EINVAL; + fail: /* * "s" object was allocated by g_malloc0 so we can safely -- 2.34.1
At the beginning of the function we can return immediately until we really allocate s->header. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0); if (ret < 0) { - goto fail; + return ret; } bs->total_sectors = le64_to_cpu(ph.nb_sectors); @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->tracks = le32_to_cpu(ph.tracks); if (s->tracks == 0) { error_setg(errp, "Invalid image: Zero sectors per track"); - ret = -EINVAL; - goto fail; + return -EINVAL; } if (s->tracks > INT32_MAX/513) { error_setg(errp, "Invalid image: Too big cluster"); - ret = -EFBIG; - goto fail; + return -EFBIG; } s->prealloc_size = MAX(s->tracks, s->prealloc_size); s->cluster_size = s->tracks << BDRV_SECTOR_BITS; @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_size = le32_to_cpu(ph.bat_entries); if (s->bat_size > INT_MAX / sizeof(uint32_t)) { error_setg(errp, "Catalog too large"); - ret = -EFBIG; - goto fail; + return -EFBIG; } size = bat_entry_off(s->bat_size); s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs)); s->header = qemu_try_blockalign(bs->file->bs, s->header_size); if (s->header == NULL) { - ret = -ENOMEM; - goto fail; + return -ENOMEM; } ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); -- 2.34.1
More conditions follows thus the check should be more scalable. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, int ret, size, i; int64_t file_nb_sectors, sector; uint32_t data_start; - bool data_off_is_correct; + bool need_check = false; ret = parallels_opts_prealloc(bs, options, errp); if (ret < 0) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_bitmap = (uint32_t *)(s->header + 1); if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - s->header_unclean = true; + need_check = s->header_unclean = true; + } + + { + bool ok = parallels_test_data_off(s, file_nb_sectors, &data_start); + need_check = need_check || !ok; } - data_off_is_correct = parallels_test_data_off(s, file_nb_sectors, - &data_start); s->data_start = data_start; s->data_end = s->data_start; if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->data_end = sector + s->tracks; } } + need_check = need_check || s->data_end > file_nb_sectors; /* * We don't repair the image here if it's opened for checks. Also we don't @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, return 0; } - /* - * Repair the image if it's dirty or - * out-of-image corruption was detected. - */ - if (s->data_end > file_nb_sectors || s->header_unclean - || !data_off_is_correct) { + /* Repair the image if corruption was detected. */ + if (need_check) { BdrvCheckResult res; ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); if (ret < 0) { @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } } - return 0; fail_format: -- 2.34.1
This functionality is used twice already and next patch will add more code with it. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_set_bat_entry(BDRVParallelsState *s, bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); } +static int mark_used(BlockDriverState *bs, + unsigned long *bitmap, uint32_t bitmap_size, int64_t off) +{ + BDRVParallelsState *s = bs->opaque; + uint32_t cluster_index = host_cluster_index(s, off); + if (cluster_index >= bitmap_size) { + return -E2BIG; + } + if (test_bit(cluster_index, bitmap)) { + return -EBUSY; + } + bitmap_set(bitmap, cluster_index, 1); + return 0; +} + static int64_t coroutine_fn GRAPH_RDLOCK allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, BDRVParallelsState *s = bs->opaque; int64_t host_off, host_sector, guest_sector; unsigned long *bitmap; - uint32_t i, bitmap_size, cluster_index, bat_entry; + uint32_t i, bitmap_size, bat_entry; int n, ret = 0; uint64_t *buf = NULL; bool fixed = false; @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, continue; } - cluster_index = host_cluster_index(s, host_off); - assert(cluster_index < bitmap_size); - if (!test_bit(cluster_index, bitmap)) { - bitmap_set(bitmap, cluster_index, 1); + ret = mark_used(bs, bitmap, bitmap_size, host_off); + assert(ret != -E2BIG); + if (ret == 0) { continue; } @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, * consistent for the new allocated clusters too. * * Note, clusters allocated outside the current image are not - * considered, and the bitmap size doesn't change. + * considered, and the bitmap size doesn't change. This specifically + * means that -E2BIG is OK. */ - cluster_index = host_cluster_index(s, host_off); - if (cluster_index < bitmap_size) { - bitmap_set(bitmap, cluster_index, 1); + ret = mark_used(bs, bitmap, bitmap_size, host_off); + if (ret == -EBUSY) { + res->check_errors++; + goto out_repair_bat; } fixed = true; -- 2.34.1
Since commit cfce1091d55322789582480798a891cbaf66924e Author: Alexander Ivanov <alexander.ivanov@virtuozzo.com> Date: Tue Jul 18 12:44:29 2023 +0200 parallels: Image repairing in parallels_open() there is a potential pit fall with calling qemu-io -c "read" The image is opened in read-write mode and thus could be potentially repaired. This could ruin testing process. The patch forces read-only opening for reads. In that case repairing is impossible. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- tests/qemu-iotests/tests/parallels-checks | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ file_size=`stat --printf="%s" "$TEST_IMG"` echo "file size: $file_size" echo "== check last cluster ==" -{ $QEMU_IO -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # Clear image _make_test_img $SIZE @@ -XXX,XX +XXX,XX @@ echo "== write another pattern to second cluster ==" { $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== corrupt image ==" poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00" echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== repair image ==" _check_test_img -r all echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== check first cluster on host ==" printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` -- 2.34.1
Once we have repaired data_off field in the header we should update s->data_start which is calculated on the base of it. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, res->corruptions++; if (fix & BDRV_FIX_ERRORS) { s->header->data_off = cpu_to_le32(data_off); + s->data_start = data_off; res->corruptions_fixed++; } -- 2.34.1
We have only check through self-repair and that proven to be not enough. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- tests/qemu-iotests/tests/parallels-checks | 17 +++++++++++++++++ tests/qemu-iotests/tests/parallels-checks.out | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff" echo "== check first cluster ==" { $QEMU_IO -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +# Clear image +_make_test_img $SIZE + +echo "== TEST DATA_OFF THROUGH REPAIR ==" + +echo "== write pattern to first cluster ==" +{ $QEMU_IO -c "write -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== spoil data_off field ==" +poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff" + +echo "== repair image ==" +_check_test_img -r all + +echo "== check first cluster ==" +{ $QEMU_IO -r -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -XXX,XX +XXX,XX @@ wrote 1048576/1048576 bytes at offset 0 Repairing data_off field has incorrect value read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +== TEST DATA_OFF THROUGH REPAIR == +== write pattern to first cluster == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== spoil data_off field == +== repair image == +Repairing data_off field has incorrect value +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. +== check first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done -- 2.34.1
If the operation is failed, we need to check image consistency if the problem is not about memory allocation. Bitmap adjustments in allocate_cluster are not performed yet. They worth to be separate. This was proven useful during debug of this series. Kept as is for future bissecting. It should be specifically noted that used bitmap must be recalculated if data_off has been fixed during image consistency check. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ block/parallels.h | 3 ++ 2 files changed, 76 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static int mark_used(BlockDriverState *bs, return 0; } +/* + * Collect used bitmap. The image can contain errors, we should fill the + * bitmap anyway, as much as we can. This information will be used for + * error resolution. + */ +static int parallels_fill_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + int64_t payload_bytes; + uint32_t i; + int err = 0; + + payload_bytes = bdrv_co_getlength(bs->file->bs); + if (payload_bytes < 0) { + return payload_bytes; + } + payload_bytes -= s->data_start * BDRV_SECTOR_SIZE; + if (payload_bytes < 0) { + return -EINVAL; + } + + s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size); + if (s->used_bmap_size == 0) { + return 0; + } + s->used_bmap = bitmap_try_new(s->used_bmap_size); + if (s->used_bmap == NULL) { + return -ENOMEM; + } + + for (i = 0; i < s->bat_size; i++) { + int err2; + int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off); + if (err2 < 0 && err == 0) { + err = err2; + } + } + return err; +} + +static void parallels_free_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + s->used_bmap_size = 0; + g_free(s->used_bmap); +} + static int64_t coroutine_fn GRAPH_RDLOCK allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) @@ -XXX,XX +XXX,XX @@ parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, res->corruptions++; if (fix & BDRV_FIX_ERRORS) { + int err; s->header->data_off = cpu_to_le32(data_off); s->data_start = data_off; + + parallels_free_used_bitmap(bs); + err = parallels_fill_used_bitmap(bs); + if (err == -ENOMEM) { + res->check_errors++; + return err; + } + res->corruptions_fixed++; } @@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } need_check = need_check || s->data_end > file_nb_sectors; + if (!need_check) { + ret = parallels_fill_used_bitmap(bs); + if (ret == -ENOMEM) { + goto fail; + } + need_check = need_check || ret < 0; /* These are correctable errors */ + } + /* * We don't repair the image here if it's opened for checks. Also we don't * want to change inactive images and can't change readonly images. @@ -XXX,XX +XXX,XX @@ fail: * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. */ + parallels_free_used_bitmap(bs); + error_free(s->migration_blocker); g_free(s->bat_dirty_bmap); qemu_vfree(s->header); @@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs) PREALLOC_MODE_OFF, 0, NULL); } + parallels_free_used_bitmap(bs); + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); diff --git a/block/parallels.h b/block/parallels.h index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -XXX,XX +XXX,XX @@ typedef struct BDRVParallelsState { unsigned long *bat_dirty_bmap; unsigned int bat_dirty_block; + unsigned long *used_bmap; + unsigned long used_bmap_size; + uint32_t *bat_bitmap; unsigned int bat_size; -- 2.34.1
Original check is broken as supposed reading from 2 different clusters results in read from the same file offset twice. This is definitely wrong. We should be sure that * the content of both clusters is correct after repair * clusters are at the different offsets after repair In order to check the latter we write some content into the first one and validate that fact. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- tests/qemu-iotests/tests/parallels-checks | 14 ++++++++++---- tests/qemu-iotests/tests/parallels-checks.out | 16 ++++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ echo "== check second cluster ==" echo "== repair image ==" _check_test_img -r all +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== check second cluster ==" { $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir -echo "== check first cluster on host ==" -printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` +echo "== write another pattern to the first clusters ==" +{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir -echo "== check second cluster on host ==" -printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` +echo "== check the second cluster (deduplicated) ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # Clear image _make_test_img $SIZE diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -XXX,XX +XXX,XX @@ The following inconsistencies were found and repaired: Double checking the fixed image now... No errors were found on the image. +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == check second cluster == read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -== check first cluster on host == -content: 0x11 -== check second cluster on host == -content: 0x11 +== write another pattern to the first clusters == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the second cluster (deduplicated) == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 == TEST DATA_OFF CHECK == == write pattern to first cluster == -- 2.34.1
The test is quite similar with the original one for duplicated clusters. There is the only difference in the operation which should fix the image. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- tests/qemu-iotests/tests/parallels-checks | 36 +++++++++++++++++++ tests/qemu-iotests/tests/parallels-checks.out | 31 ++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -XXX,XX +XXX,XX @@ echo "== check the second cluster (deduplicated) ==" # Clear image _make_test_img $SIZE +echo "== TEST DUPLICATION SELF-CURE ==" + +echo "== write pattern to whole image ==" +{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== write another pattern to second cluster ==" +{ $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + + +echo "== corrupt image ==" +poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00" + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster with self-repair ==" +{ $QEMU_IO -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== write another pattern to the first clusters ==" +{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the second cluster (deduplicated) ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +# Clear image +_make_test_img $SIZE + echo "== TEST DATA_OFF CHECK ==" echo "== write pattern to first cluster ==" diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0 read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +== TEST DUPLICATION SELF-CURE == +== write pattern to whole image == +wrote 4194304/4194304 bytes at offset 0 +4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== write another pattern to second cluster == +wrote 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== corrupt image == +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster with self-repair == +Repairing duplicate offset in BAT entry 1 +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== write another pattern to the first clusters == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the second cluster (deduplicated) == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 == TEST DATA_OFF CHECK == == write pattern to first cluster == wrote 1048576/1048576 bytes at offset 0 -- 2.34.1
This would be useful in the next patch in allocate_clusters(). This change would not imply serious performance drawbacks as usually image is full of data or are at the end of the bitmap. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ static void parallels_set_bat_entry(BDRVParallelsState *s, bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); } -static int mark_used(BlockDriverState *bs, - unsigned long *bitmap, uint32_t bitmap_size, int64_t off) +static int mark_used(BlockDriverState *bs, unsigned long *bitmap, + uint32_t bitmap_size, int64_t off, uint32_t count) { BDRVParallelsState *s = bs->opaque; uint32_t cluster_index = host_cluster_index(s, off); - if (cluster_index >= bitmap_size) { + unsigned long next_used; + if (cluster_index + count > bitmap_size) { return -E2BIG; } - if (test_bit(cluster_index, bitmap)) { + next_used = find_next_bit(bitmap, bitmap_size, cluster_index); + if (next_used < cluster_index + count) { return -EBUSY; } - bitmap_set(bitmap, cluster_index, 1); + bitmap_set(bitmap, cluster_index, count); return 0; } @@ -XXX,XX +XXX,XX @@ static int parallels_fill_used_bitmap(BlockDriverState *bs) continue; } - err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off); + err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 1); if (err2 < 0 && err == 0) { err = err2; } @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, continue; } - ret = mark_used(bs, bitmap, bitmap_size, host_off); + ret = mark_used(bs, bitmap, bitmap_size, host_off, 1); assert(ret != -E2BIG); if (ret == 0) { continue; @@ -XXX,XX +XXX,XX @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, * considered, and the bitmap size doesn't change. This specifically * means that -E2BIG is OK. */ - ret = mark_used(bs, bitmap, bitmap_size, host_off); + ret = mark_used(bs, bitmap, bitmap_size, host_off, 1); if (ret == -EBUSY) { res->check_errors++; goto out_repair_bat; -- 2.34.1
We should extend the bitmap if the file is extended and set the bit in the image used bitmap once the cluster is allocated. Sanity check at that moment also looks like a good idea. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, return len; } if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { + uint32_t new_usedsize; + space += s->prealloc_size; /* * We require the expanded size to read back as zero. If the @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { return ret; } + + new_usedsize = s->used_bmap_size + + (space << BDRV_SECTOR_BITS) / s->cluster_size; + s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, + new_usedsize); + s->used_bmap_size = new_usedsize; } /* @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } + ret = mark_used(bs, s->used_bmap, s->used_bmap_size, + s->data_end << BDRV_SECTOR_BITS, to_allocate); + if (ret < 0) { + /* Image consistency is broken. Alarm! */ + return ret; + } for (i = 0; i < to_allocate; i++) { parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); s->data_end += s->tracks; -- 2.34.1
The access to the bitmap is not optimized completely. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 51 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, { int ret = 0; BDRVParallelsState *s = bs->opaque; - int64_t pos, space, idx, to_allocate, i, len; + int64_t i, pos, idx, to_allocate, first_free, host_off; pos = block_status(s, sector_num, nb_sectors, pnum); if (pos > 0) { @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, */ assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); - space = to_allocate * s->tracks; - len = bdrv_co_getlength(bs->file->bs); - if (len < 0) { - return len; - } - if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { + first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size); + if (first_free == s->used_bmap_size) { uint32_t new_usedsize; + int64_t space = to_allocate * s->tracks + s->prealloc_size; + + host_off = s->data_end * BDRV_SECTOR_SIZE; - space += s->prealloc_size; /* * We require the expanded size to read back as zero. If the * user permitted truncation, we try that; but if it fails, we @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, new_usedsize); s->used_bmap_size = new_usedsize; + } else { + int64_t next_used; + next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free); + + /* Not enough continuous clusters in the middle, adjust the size */ + if (next_used - first_free < to_allocate) { + to_allocate = next_used - first_free; + *pnum = (idx + to_allocate) * s->tracks - sector_num; + } + + host_off = s->data_start * BDRV_SECTOR_SIZE; + host_off += first_free * s->cluster_size; + + /* + * No need to preallocate if we are using tail area from the above + * branch. In the other case we are likely re-using hole. Preallocate + * the space if required by the prealloc_mode. + */ + if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE && + host_off < s->data_end * BDRV_SECTOR_SIZE) { + ret = bdrv_co_pwrite_zeroes(bs->file, host_off, + s->cluster_size * to_allocate, 0); + if (ret < 0) { + return ret; + } + } } /* @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } - ret = mark_used(bs, s->used_bmap, s->used_bmap_size, - s->data_end << BDRV_SECTOR_BITS, to_allocate); + ret = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, to_allocate); if (ret < 0) { /* Image consistency is broken. Alarm! */ return ret; } for (i = 0; i < to_allocate; i++) { - parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); - s->data_end += s->tracks; + parallels_set_bat_entry(s, idx + i, + host_off / BDRV_SECTOR_SIZE / s->off_multiplier); + host_off += s->cluster_size; + } + if (host_off > s->data_end * BDRV_SECTOR_SIZE) { + s->data_end = host_off / BDRV_SECTOR_SIZE; } return bat2sect(s, idx) + sector_num % s->tracks; -- 2.34.1
Replace 'space' representing the amount of data to preallocate with 'bytes'. Rationale: * 'space' at each place is converted to bytes * the unit is more close to the variable name Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size); if (first_free == s->used_bmap_size) { uint32_t new_usedsize; - int64_t space = to_allocate * s->tracks + s->prealloc_size; + int64_t bytes = to_allocate * s->cluster_size; + bytes += s->prealloc_size * BDRV_SECTOR_SIZE; host_off = s->data_end * BDRV_SECTOR_SIZE; @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, * force the safer-but-slower fallocate. */ if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { - ret = bdrv_co_truncate(bs->file, - (s->data_end + space) << BDRV_SECTOR_BITS, + ret = bdrv_co_truncate(bs->file, host_off + bytes, false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL); if (ret == -ENOTSUP) { @@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { - ret = bdrv_co_pwrite_zeroes(bs->file, - s->data_end << BDRV_SECTOR_BITS, - space << BDRV_SECTOR_BITS, 0); + ret = bdrv_co_pwrite_zeroes(bs->file, host_off, bytes, 0); } if (ret < 0) { return ret; } - new_usedsize = s->used_bmap_size + - (space << BDRV_SECTOR_BITS) / s->cluster_size; + new_usedsize = s->used_bmap_size + bytes / s->cluster_size; s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, new_usedsize); s->used_bmap_size = new_usedsize; -- 2.34.1
* Discarding with backing stores is not supported by the format. * There is no buffering/queueing of the discard operation. * Only operations aligned to the cluster are supported. Signed-off-by: Denis V. Lunev <den@openvz.org> --- block/parallels.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, return ret; } + +static int coroutine_fn +parallels_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + int ret = 0; + uint32_t cluster, count; + BDRVParallelsState *s = bs->opaque; + + /* + * The image does not support ZERO mark inside the BAT, which means that + * stale data could be exposed from the backing file. + */ + if (bs->backing) { + return -ENOTSUP; + } + + if (!QEMU_IS_ALIGNED(offset, s->cluster_size)) { + return -ENOTSUP; + } else if (!QEMU_IS_ALIGNED(bytes, s->cluster_size)) { + return -ENOTSUP; + } + + cluster = offset / s->cluster_size; + count = bytes / s->cluster_size; + + qemu_co_mutex_lock(&s->lock); + for (; count > 0; cluster++, count--) { + int64_t host_off = bat2sect(s, cluster) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + ret = bdrv_co_pdiscard(bs->file, host_off, s->cluster_size); + if (ret < 0) { + goto done; + } + + parallels_set_bat_entry(s, cluster, 0); + bitmap_clear(s->used_bmap, host_cluster_index(s, host_off), 1); + } +done: + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void parallels_check_unclean(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { .bdrv_co_create = parallels_co_create, .bdrv_co_create_opts = parallels_co_create_opts, .bdrv_co_check = parallels_co_check, + .bdrv_co_pdiscard = parallels_co_pdiscard, }; static void bdrv_parallels_init(void) -- 2.34.1
This patch contains test which minimally tests discard and new cluster allocation logic. The following checks are added: * write 2 clusters, discard the first allocated * write another cluster, check that the hole is filled * write 2 clusters, discard the first allocated, write 1 cluster at non-aligned to cluster offset (2 new clusters should be allocated) Signed-off-by: Denis V. Lunev <den@openvz.org> --- tests/qemu-iotests/131 | 31 +++++++++++++++++++++++++++++++ tests/qemu-iotests/131.out | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -XXX,XX +XXX,XX @@ poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74" echo "== read corrupted image with repairing ==" { $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== check discard ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "discard 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check simple allocation over the discarded hole ==" + +{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check more complex allocation over the discard hole ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "discard $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +# There is 1 cluster hole. Fill it fully and allocate 1 cluster at the end +{ $QEMU_IO -c "write -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== allocate with backing ==" # Verify that allocating clusters works fine even when there is a backing image. # Regression test for a bug where we would pass a buffer read from the backing diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -XXX,XX +XXX,XX @@ read 524288/524288 bytes at offset 0 Repairing image was not closed correctly read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check discard == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x200000 TEST_DIR/t.IMGFMT +discard 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check simple allocation over the discarded hole == +wrote 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +0x200000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check more complex allocation over the discard hole == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 2097152 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 524288 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x100000 TEST_DIR/t.IMGFMT +0x100000 0x100000 TEST_DIR/t.IMGFMT +0x300000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 524288 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 1572864 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == allocate with backing == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 -- 2.34.1
The zero flag is missed in the Parallels format specification. We can resort to discard if we have no backing file. Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- block/parallels.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index XXXXXXX..XXXXXXX 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -XXX,XX +XXX,XX @@ done: return ret; } +static int coroutine_fn +parallels_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + BdrvRequestFlags flags) +{ + /* + * The zero flag is missed in the Parallels format specification. We can + * resort to discard if we have no backing file (this condition is checked + * inside parallels_co_pdiscard(). + */ + return parallels_co_pdiscard(bs, offset, bytes); +} + + static void parallels_check_unclean(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = { .bdrv_co_create_opts = parallels_co_create_opts, .bdrv_co_check = parallels_co_check, .bdrv_co_pdiscard = parallels_co_pdiscard, + .bdrv_co_pwrite_zeroes = parallels_co_pwrite_zeroes, }; static void bdrv_parallels_init(void) -- 2.34.1
This patch contains test which minimally tests write-zeroes on top of working discard. The following checks are added: * write 2 clusters, write-zero to the first allocated cluster * write 2 cluster, write-zero to the half the first allocated cluster Signed-off-by: Denis V. Lunev <den@openvz.org> Reviewed-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com> --- tests/qemu-iotests/131 | 21 +++++++++++++++++++++ tests/qemu-iotests/131.out | 22 ++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -XXX,XX +XXX,XX @@ _make_test_img $size { $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir { $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== check write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check cluster-partial write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== allocate with backing ==" # Verify that allocating clusters works fine even when there is a backing image. # Regression test for a bug where we would pass a buffer read from the backing diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -XXX,XX +XXX,XX @@ read 524288/524288 bytes at offset 0 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 524288/524288 bytes at offset 1572864 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x200000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check cluster-partial write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == allocate with backing == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 -- 2.34.1