1 | The following changes since commit fa54abb8c298f892639ffc4bc2f61448ac3be4a1: | 1 | The following changes since commit 36f87b4513373b3cd79c87c9197d17face95d4ac: |
---|---|---|---|
2 | 2 | ||
3 | Drop QEMU_GNUC_PREREQ() checks for gcc older than 4.1 (2017-04-20 18:33:33 +0100) | 3 | Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.10-20170630' into staging (2017-06-30 11:58:49 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | git://github.com/stefanha/qemu.git tags/block-pull-request | 7 | git://github.com/famz/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 3ccc0a0163b932fe980dce8d26db4bf98b1900e9: | 9 | for you to fetch changes up to c61e684e44272f2acb2bef34cf2aa234582a73a9: |
10 | 10 | ||
11 | MAINTAINERS: update my email address (2017-04-21 10:36:12 +0100) | 11 | block: Exploit BDRV_BLOCK_EOF for larger zero blocks (2017-06-30 21:48:06 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | 14 | ||
15 | Hi Peter, | ||
16 | |||
17 | Here are Eric Blake's enhancement to block layer API. Thanks! | ||
18 | |||
15 | ---------------------------------------------------------------- | 19 | ---------------------------------------------------------------- |
16 | 20 | ||
17 | Changlong Xie (1): | 21 | Eric Blake (2): |
18 | MAINTAINERS: update Wen's email address | 22 | block: Add BDRV_BLOCK_EOF to bdrv_get_block_status() |
23 | block: Exploit BDRV_BLOCK_EOF for larger zero blocks | ||
19 | 24 | ||
20 | Lidong Chen (1): | 25 | block/io.c | 42 +++++++++++++++++++++++++++++++++--------- |
21 | migration/block: use blk_pwrite_zeroes for each zero cluster | 26 | include/block/block.h | 2 ++ |
22 | 27 | tests/qemu-iotests/154 | 4 ---- | |
23 | Stefan Hajnoczi (3): | 28 | tests/qemu-iotests/154.out | 12 ++++++------ |
24 | qemu-options: explain disk I/O throttling options | 29 | 4 files changed, 41 insertions(+), 19 deletions(-) |
25 | throttle: do not use invalid config in test | ||
26 | throttle: make throttle_config(throttle_get_config()) symmetric | ||
27 | |||
28 | Zhang Chen (1): | ||
29 | MAINTAINERS: update my email address | ||
30 | |||
31 | MAINTAINERS | 4 ++-- | ||
32 | migration/block.c | 35 +++++++++++++++++++++++++++++++++-- | ||
33 | tests/test-throttle.c | 8 ++++---- | ||
34 | util/throttle.c | 14 ++++++++++++++ | ||
35 | qemu-options.hx | 24 ++++++++++++++++++++++++ | ||
36 | 5 files changed, 77 insertions(+), 8 deletions(-) | ||
37 | 30 | ||
38 | -- | 31 | -- |
39 | 2.9.3 | 32 | 2.9.4 |
40 | 33 | ||
41 | 34 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The disk I/O throttling options have been listed for a long time but | ||
2 | never explained on the QEMU man page. | ||
3 | 1 | ||
4 | Suggested-by: Nini Gu <ngu@redhat.com> | ||
5 | Cc: Alberto Garcia <berto@igalia.com> | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
8 | Reviewed-by: Greg Kurz <groug@kaod.org> | ||
9 | Message-id: 20170301115026.22621-2-stefanha@redhat.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | ||
12 | qemu-options.hx | 24 ++++++++++++++++++++++++ | ||
13 | 1 file changed, 24 insertions(+) | ||
14 | |||
15 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/qemu-options.hx | ||
18 | +++ b/qemu-options.hx | ||
19 | @@ -XXX,XX +XXX,XX @@ file sectors into the image file. | ||
20 | conversion of plain zero writes by the OS to driver specific optimized | ||
21 | zero write commands. You may even choose "unmap" if @var{discard} is set | ||
22 | to "unmap" to allow a zero write to be converted to an UNMAP operation. | ||
23 | +@item bps=@var{b},bps_rd=@var{r},bps_wr=@var{w} | ||
24 | +Specify bandwidth throttling limits in bytes per second, either for all request | ||
25 | +types or for reads or writes only. Small values can lead to timeouts or hangs | ||
26 | +inside the guest. A safe minimum for disks is 2 MB/s. | ||
27 | +@item bps_max=@var{bm},bps_rd_max=@var{rm},bps_wr_max=@var{wm} | ||
28 | +Specify bursts in bytes per second, either for all request types or for reads | ||
29 | +or writes only. Bursts allow the guest I/O to spike above the limit | ||
30 | +temporarily. | ||
31 | +@item iops=@var{i},iops_rd=@var{r},iops_wr=@var{w} | ||
32 | +Specify request rate limits in requests per second, either for all request | ||
33 | +types or for reads or writes only. | ||
34 | +@item iops_max=@var{bm},iops_rd_max=@var{rm},iops_wr_max=@var{wm} | ||
35 | +Specify bursts in requests per second, either for all request types or for reads | ||
36 | +or writes only. Bursts allow the guest I/O to spike above the limit | ||
37 | +temporarily. | ||
38 | +@item iops_size=@var{is} | ||
39 | +Let every @var{is} bytes of a request count as a new request for iops | ||
40 | +throttling purposes. Use this option to prevent guests from circumventing iops | ||
41 | +limits by sending fewer but larger requests. | ||
42 | +@item group=@var{g} | ||
43 | +Join a throttling quota group with given name @var{g}. All drives that are | ||
44 | +members of the same group are accounted for together. Use this option to | ||
45 | +prevent guests from circumventing throttling limits by using many small disks | ||
46 | +instead of a single larger disk. | ||
47 | @end table | ||
48 | |||
49 | By default, the @option{cache=writeback} mode is used. It will report data | ||
50 | -- | ||
51 | 2.9.3 | ||
52 | |||
53 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | The (burst) max parameter cannot be smaller than the avg parameter. | ||
2 | There is a test case that uses avg = 56, max = 1 and gets away with it | ||
3 | because no input validation is performed by the test case. | ||
4 | 1 | ||
5 | This patch switches to valid test input parameters. | ||
6 | |||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
9 | Message-id: 20170301115026.22621-3-stefanha@redhat.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | ||
12 | tests/test-throttle.c | 8 ++++---- | ||
13 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
14 | |||
15 | diff --git a/tests/test-throttle.c b/tests/test-throttle.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/tests/test-throttle.c | ||
18 | +++ b/tests/test-throttle.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void test_config_functions(void) | ||
20 | orig_cfg.buckets[THROTTLE_OPS_READ].avg = 69; | ||
21 | orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23; | ||
22 | |||
23 | - orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */ | ||
24 | - orig_cfg.buckets[THROTTLE_BPS_READ].max = 1; /* should not be corrected */ | ||
25 | + orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */ | ||
26 | + orig_cfg.buckets[THROTTLE_BPS_READ].max = 56; /* should not be corrected */ | ||
27 | orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120; | ||
28 | |||
29 | orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150; | ||
30 | @@ -XXX,XX +XXX,XX @@ static void test_config_functions(void) | ||
31 | g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg == 69); | ||
32 | g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23); | ||
33 | |||
34 | - g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3);/* fixed */ | ||
35 | - g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 1); /* not fixed */ | ||
36 | + g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3); /* fixed */ | ||
37 | + g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 56); /* not fixed */ | ||
38 | g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120); | ||
39 | |||
40 | g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150); | ||
41 | -- | ||
42 | 2.9.3 | ||
43 | |||
44 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Throttling has a weird property that throttle_get_config() does not | ||
2 | always return the same throttling settings that were given with | ||
3 | throttle_config(). In other words, the set and get functions aren't | ||
4 | symmetric. | ||
5 | 1 | ||
6 | If .max is 0 then the throttling code assigns a default value of .avg / | ||
7 | 10 in throttle_config(). This is an implementation detail of the | ||
8 | throttling algorithm. When throttle_get_config() is called the .max | ||
9 | value returned should still be 0. | ||
10 | |||
11 | Users are exposed to this quirk via "info block" or "query-block" | ||
12 | monitor commands. This has caused confusion because it looks like a bug | ||
13 | when an unexpected value is reported. | ||
14 | |||
15 | This patch hides the .max value adjustment in throttle_get_config() and | ||
16 | updates test-throttle.c appropriately. | ||
17 | |||
18 | Reported-by: Nini Gu <ngu@redhat.com> | ||
19 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
20 | Reviewed-by: Alberto Garcia <berto@igalia.com> | ||
21 | Message-id: 20170301115026.22621-4-stefanha@redhat.com | ||
22 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
23 | --- | ||
24 | tests/test-throttle.c | 8 ++++---- | ||
25 | util/throttle.c | 14 ++++++++++++++ | ||
26 | 2 files changed, 18 insertions(+), 4 deletions(-) | ||
27 | |||
28 | diff --git a/tests/test-throttle.c b/tests/test-throttle.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/tests/test-throttle.c | ||
31 | +++ b/tests/test-throttle.c | ||
32 | @@ -XXX,XX +XXX,XX @@ static void test_config_functions(void) | ||
33 | orig_cfg.buckets[THROTTLE_OPS_READ].avg = 69; | ||
34 | orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23; | ||
35 | |||
36 | - orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */ | ||
37 | - orig_cfg.buckets[THROTTLE_BPS_READ].max = 56; /* should not be corrected */ | ||
38 | + orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; | ||
39 | + orig_cfg.buckets[THROTTLE_BPS_READ].max = 56; | ||
40 | orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120; | ||
41 | |||
42 | orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150; | ||
43 | @@ -XXX,XX +XXX,XX @@ static void test_config_functions(void) | ||
44 | g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg == 69); | ||
45 | g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23); | ||
46 | |||
47 | - g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3); /* fixed */ | ||
48 | - g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 56); /* not fixed */ | ||
49 | + g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 0); | ||
50 | + g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 56); | ||
51 | g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120); | ||
52 | |||
53 | g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150); | ||
54 | diff --git a/util/throttle.c b/util/throttle.c | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/util/throttle.c | ||
57 | +++ b/util/throttle.c | ||
58 | @@ -XXX,XX +XXX,XX @@ static void throttle_fix_bucket(LeakyBucket *bkt) | ||
59 | } | ||
60 | } | ||
61 | |||
62 | +/* undo internal bucket parameter changes (see throttle_fix_bucket()) */ | ||
63 | +static void throttle_unfix_bucket(LeakyBucket *bkt) | ||
64 | +{ | ||
65 | + if (bkt->max < bkt->avg) { | ||
66 | + bkt->max = 0; | ||
67 | + } | ||
68 | +} | ||
69 | + | ||
70 | /* take care of canceling a timer */ | ||
71 | static void throttle_cancel_timer(QEMUTimer *timer) | ||
72 | { | ||
73 | @@ -XXX,XX +XXX,XX @@ void throttle_config(ThrottleState *ts, | ||
74 | */ | ||
75 | void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) | ||
76 | { | ||
77 | + int i; | ||
78 | + | ||
79 | *cfg = ts->cfg; | ||
80 | + | ||
81 | + for (i = 0; i < BUCKETS_COUNT; i++) { | ||
82 | + throttle_unfix_bucket(&cfg->buckets[i]); | ||
83 | + } | ||
84 | } | ||
85 | |||
86 | |||
87 | -- | ||
88 | 2.9.3 | ||
89 | |||
90 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Lidong Chen <lidongchen@tencent.com> | ||
2 | 1 | ||
3 | BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default, | ||
4 | this may cause the qcow2 file size to be bigger after migration. | ||
5 | This patch checks each cluster, using blk_pwrite_zeroes for each | ||
6 | zero cluster. | ||
7 | |||
8 | [Initialize cluster_size to BLOCK_SIZE to prevent a gcc uninitialized | ||
9 | variable compiler warning. In reality we always initialize cluster_size | ||
10 | in a conditional but gcc doesn't know that. | ||
11 | --Stefan] | ||
12 | |||
13 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | Signed-off-by: Lidong Chen <lidongchen@tencent.com> | ||
15 | Message-id: 1492050868-16200-1-git-send-email-lidongchen@tencent.com | ||
16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
17 | --- | ||
18 | migration/block.c | 35 +++++++++++++++++++++++++++++++++-- | ||
19 | 1 file changed, 33 insertions(+), 2 deletions(-) | ||
20 | |||
21 | diff --git a/migration/block.c b/migration/block.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/migration/block.c | ||
24 | +++ b/migration/block.c | ||
25 | @@ -XXX,XX +XXX,XX @@ static int block_load(QEMUFile *f, void *opaque, int version_id) | ||
26 | int64_t total_sectors = 0; | ||
27 | int nr_sectors; | ||
28 | int ret; | ||
29 | + BlockDriverInfo bdi; | ||
30 | + int cluster_size = BLOCK_SIZE; | ||
31 | |||
32 | do { | ||
33 | addr = qemu_get_be64(f); | ||
34 | @@ -XXX,XX +XXX,XX @@ static int block_load(QEMUFile *f, void *opaque, int version_id) | ||
35 | error_report_err(local_err); | ||
36 | return -EINVAL; | ||
37 | } | ||
38 | + | ||
39 | + ret = bdrv_get_info(blk_bs(blk), &bdi); | ||
40 | + if (ret == 0 && bdi.cluster_size > 0 && | ||
41 | + bdi.cluster_size <= BLOCK_SIZE && | ||
42 | + BLOCK_SIZE % bdi.cluster_size == 0) { | ||
43 | + cluster_size = bdi.cluster_size; | ||
44 | + } else { | ||
45 | + cluster_size = BLOCK_SIZE; | ||
46 | + } | ||
47 | } | ||
48 | |||
49 | if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) { | ||
50 | @@ -XXX,XX +XXX,XX @@ static int block_load(QEMUFile *f, void *opaque, int version_id) | ||
51 | nr_sectors * BDRV_SECTOR_SIZE, | ||
52 | BDRV_REQ_MAY_UNMAP); | ||
53 | } else { | ||
54 | + int i; | ||
55 | + int64_t cur_addr; | ||
56 | + uint8_t *cur_buf; | ||
57 | + | ||
58 | buf = g_malloc(BLOCK_SIZE); | ||
59 | qemu_get_buffer(f, buf, BLOCK_SIZE); | ||
60 | - ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf, | ||
61 | - nr_sectors * BDRV_SECTOR_SIZE, 0); | ||
62 | + for (i = 0; i < BLOCK_SIZE / cluster_size; i++) { | ||
63 | + cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size; | ||
64 | + cur_buf = buf + i * cluster_size; | ||
65 | + | ||
66 | + if ((!block_mig_state.zero_blocks || | ||
67 | + cluster_size < BLOCK_SIZE) && | ||
68 | + buffer_is_zero(cur_buf, cluster_size)) { | ||
69 | + ret = blk_pwrite_zeroes(blk, cur_addr, | ||
70 | + cluster_size, | ||
71 | + BDRV_REQ_MAY_UNMAP); | ||
72 | + } else { | ||
73 | + ret = blk_pwrite(blk, cur_addr, cur_buf, | ||
74 | + cluster_size, 0); | ||
75 | + } | ||
76 | + if (ret < 0) { | ||
77 | + break; | ||
78 | + } | ||
79 | + } | ||
80 | g_free(buf); | ||
81 | } | ||
82 | |||
83 | -- | ||
84 | 2.9.3 | ||
85 | |||
86 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eric Blake <eblake@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | I'm leaving my job at Fujitsu, this email address will stop working | 3 | Just as the block layer already sets BDRV_BLOCK_ALLOCATED as a |
4 | this week. Update it to one that I will have access to later. | 4 | shortcut for subsequent operations, there are also some optimizations |
5 | that are made easier if we can quickly tell that *pnum will advance | ||
6 | us to the end of a file, via a new BDRV_BLOCK_EOF which gets set | ||
7 | by the block layer. | ||
5 | 8 | ||
6 | Signed-off-by: Xie Changlong <xiecl.fnst@cn.fujitsu.com> | 9 | This just plumbs up the new bit; subsequent patches will make use |
7 | Message-id: 1492758767-19716-1-git-send-email-xiecl.fnst@cn.fujitsu.com | 10 | of it. |
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | |
12 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
13 | Message-Id: <20170505021500.19315-2-eblake@redhat.com> | ||
14 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
15 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
9 | --- | 16 | --- |
10 | MAINTAINERS | 2 +- | 17 | block/io.c | 15 +++++++++++---- |
11 | 1 file changed, 1 insertion(+), 1 deletion(-) | 18 | include/block/block.h | 2 ++ |
19 | 2 files changed, 13 insertions(+), 4 deletions(-) | ||
12 | 20 | ||
13 | diff --git a/MAINTAINERS b/MAINTAINERS | 21 | diff --git a/block/io.c b/block/io.c |
14 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/MAINTAINERS | 23 | --- a/block/io.c |
16 | +++ b/MAINTAINERS | 24 | +++ b/block/io.c |
17 | @@ -XXX,XX +XXX,XX @@ F: tests/image-fuzzer/ | 25 | @@ -XXX,XX +XXX,XX @@ typedef struct BdrvCoGetBlockStatusData { |
18 | 26 | * Drivers not implementing the functionality are assumed to not support | |
19 | Replication | 27 | * backing files, hence all their sectors are reported as allocated. |
20 | M: Wen Congyang <wencongyang2@huawei.com> | 28 | * |
21 | -M: Changlong Xie <xiecl.fnst@cn.fujitsu.com> | 29 | - * If 'sector_num' is beyond the end of the disk image the return value is 0 |
22 | +M: Xie Changlong <xiechanglong.d@gmail.com> | 30 | - * and 'pnum' is set to 0. |
23 | S: Supported | 31 | + * If 'sector_num' is beyond the end of the disk image the return value is |
24 | F: replication* | 32 | + * BDRV_BLOCK_EOF and 'pnum' is set to 0. |
25 | F: block/replication.c | 33 | * |
34 | * 'pnum' is set to the number of sectors (including and immediately following | ||
35 | * the specified sector) that are known to be in the same | ||
36 | * allocated/unallocated state. | ||
37 | * | ||
38 | * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes | ||
39 | - * beyond the end of the disk image it will be clamped. | ||
40 | + * beyond the end of the disk image it will be clamped; if 'pnum' is set to | ||
41 | + * the end of the image, then the returned value will include BDRV_BLOCK_EOF. | ||
42 | * | ||
43 | * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file' | ||
44 | * points to the BDS which the sector range is allocated in. | ||
45 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, | ||
46 | |||
47 | if (sector_num >= total_sectors) { | ||
48 | *pnum = 0; | ||
49 | - return 0; | ||
50 | + return BDRV_BLOCK_EOF; | ||
51 | } | ||
52 | |||
53 | n = total_sectors - sector_num; | ||
54 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, | ||
55 | if (!bs->drv->bdrv_co_get_block_status) { | ||
56 | *pnum = nb_sectors; | ||
57 | ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; | ||
58 | + if (sector_num + nb_sectors == total_sectors) { | ||
59 | + ret |= BDRV_BLOCK_EOF; | ||
60 | + } | ||
61 | if (bs->drv->protocol_name) { | ||
62 | ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); | ||
63 | } | ||
64 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, | ||
65 | |||
66 | out: | ||
67 | bdrv_dec_in_flight(bs); | ||
68 | + if (ret >= 0 && sector_num + *pnum == total_sectors) { | ||
69 | + ret |= BDRV_BLOCK_EOF; | ||
70 | + } | ||
71 | return ret; | ||
72 | } | ||
73 | |||
74 | diff --git a/include/block/block.h b/include/block/block.h | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/include/block/block.h | ||
77 | +++ b/include/block/block.h | ||
78 | @@ -XXX,XX +XXX,XX @@ typedef struct HDGeometry { | ||
79 | * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data | ||
80 | * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this | ||
81 | * layer (short for DATA || ZERO), set by block layer | ||
82 | + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer | ||
83 | * | ||
84 | * Internal flag: | ||
85 | * BDRV_BLOCK_RAW: used internally to indicate that the request was | ||
86 | @@ -XXX,XX +XXX,XX @@ typedef struct HDGeometry { | ||
87 | #define BDRV_BLOCK_OFFSET_VALID 0x04 | ||
88 | #define BDRV_BLOCK_RAW 0x08 | ||
89 | #define BDRV_BLOCK_ALLOCATED 0x10 | ||
90 | +#define BDRV_BLOCK_EOF 0x20 | ||
91 | #define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK | ||
92 | |||
93 | typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; | ||
26 | -- | 94 | -- |
27 | 2.9.3 | 95 | 2.9.4 |
28 | 96 | ||
29 | 97 | diff view generated by jsdifflib |
1 | From: Changlong Xie <xiecl.fnst@cn.fujitsu.com> | 1 | From: Eric Blake <eblake@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | So he can get CC'ed on future patches and bugs for this feature | 3 | When we have a BDS with unallocated clusters, but asking the status |
4 | of its underlying bs->file or backing layer encounters an end-of-file | ||
5 | condition, we know that the rest of the unallocated area will read as | ||
6 | zeroes. However, pre-patch, this required two separate calls to | ||
7 | bdrv_get_block_status(), as the first call stops at the point where | ||
8 | the underlying file ends. Thanks to BDRV_BLOCK_EOF, we can now widen | ||
9 | the results of the primary status if the secondary status already | ||
10 | includes BDRV_BLOCK_ZERO. | ||
4 | 11 | ||
5 | Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com> | 12 | In turn, this fixes a TODO mentioned in iotest 154, where we can now |
6 | Message-id: 1492484893-23435-1-git-send-email-xiecl.fnst@cn.fujitsu.com | 13 | see that all sectors in a partial cluster at the end of a file read |
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | as zero when coupling the shorter backing file's status along with our |
15 | knowledge that the remaining sectors came from an unallocated cluster. | ||
16 | |||
17 | Also, note that the loop in bdrv_co_get_block_status_above() had an | ||
18 | inefficent exit: in cases where the active layer sets BDRV_BLOCK_ZERO | ||
19 | but does NOT set BDRV_BLOCK_ALLOCATED (namely, where we know we read | ||
20 | zeroes merely because our unallocated clusters lie beyond the backing | ||
21 | file's shorter length), we still ended up probing the backing layer | ||
22 | even though we already had a good answer. | ||
23 | |||
24 | Signed-off-by: Eric Blake <eblake@redhat.com> | ||
25 | Message-Id: <20170505021500.19315-3-eblake@redhat.com> | ||
26 | Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
27 | Signed-off-by: Fam Zheng <famz@redhat.com> | ||
8 | --- | 28 | --- |
9 | MAINTAINERS | 2 +- | 29 | block/io.c | 27 ++++++++++++++++++++++----- |
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | 30 | tests/qemu-iotests/154 | 4 ---- |
31 | tests/qemu-iotests/154.out | 12 ++++++------ | ||
32 | 3 files changed, 28 insertions(+), 15 deletions(-) | ||
11 | 33 | ||
12 | diff --git a/MAINTAINERS b/MAINTAINERS | 34 | diff --git a/block/io.c b/block/io.c |
13 | index XXXXXXX..XXXXXXX 100644 | 35 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/MAINTAINERS | 36 | --- a/block/io.c |
15 | +++ b/MAINTAINERS | 37 | +++ b/block/io.c |
16 | @@ -XXX,XX +XXX,XX @@ S: Supported | 38 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, |
17 | F: tests/image-fuzzer/ | 39 | /* Ignore errors. This is just providing extra information, it |
18 | 40 | * is useful but not necessary. | |
19 | Replication | 41 | */ |
20 | -M: Wen Congyang <wency@cn.fujitsu.com> | 42 | - if (!file_pnum) { |
21 | +M: Wen Congyang <wencongyang2@huawei.com> | 43 | - /* !file_pnum indicates an offset at or beyond the EOF; it is |
22 | M: Changlong Xie <xiecl.fnst@cn.fujitsu.com> | 44 | - * perfectly valid for the format block driver to point to such |
23 | S: Supported | 45 | - * offsets, so catch it and mark everything as zero */ |
24 | F: replication* | 46 | + if (ret2 & BDRV_BLOCK_EOF && |
47 | + (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) { | ||
48 | + /* | ||
49 | + * It is valid for the format block driver to read | ||
50 | + * beyond the end of the underlying file's current | ||
51 | + * size; such areas read as zero. | ||
52 | + */ | ||
53 | ret |= BDRV_BLOCK_ZERO; | ||
54 | } else { | ||
55 | /* Limit request to the range reported by the protocol driver */ | ||
56 | @@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs, | ||
57 | { | ||
58 | BlockDriverState *p; | ||
59 | int64_t ret = 0; | ||
60 | + bool first = true; | ||
61 | |||
62 | assert(bs != base); | ||
63 | for (p = bs; p != base; p = backing_bs(p)) { | ||
64 | ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file); | ||
65 | - if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) { | ||
66 | + if (ret < 0) { | ||
67 | + break; | ||
68 | + } | ||
69 | + if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) { | ||
70 | + /* | ||
71 | + * Reading beyond the end of the file continues to read | ||
72 | + * zeroes, but we can only widen the result to the | ||
73 | + * unallocated length we learned from an earlier | ||
74 | + * iteration. | ||
75 | + */ | ||
76 | + *pnum = nb_sectors; | ||
77 | + } | ||
78 | + if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) { | ||
79 | break; | ||
80 | } | ||
81 | /* [sector_num, pnum] unallocated on this layer, which could be only | ||
82 | * the first part of [sector_num, nb_sectors]. */ | ||
83 | nb_sectors = MIN(nb_sectors, *pnum); | ||
84 | + first = false; | ||
85 | } | ||
86 | return ret; | ||
87 | } | ||
88 | diff --git a/tests/qemu-iotests/154 b/tests/qemu-iotests/154 | ||
89 | index XXXXXXX..XXXXXXX 100755 | ||
90 | --- a/tests/qemu-iotests/154 | ||
91 | +++ b/tests/qemu-iotests/154 | ||
92 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io | ||
93 | $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map | ||
94 | |||
95 | # Repeat with backing file holding unallocated cluster. | ||
96 | -# TODO: Note that this forces an allocation, because we aren't yet able to | ||
97 | -# quickly detect that reads beyond EOF of the backing file are always zero | ||
98 | CLUSTER_SIZE=2048 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024)) | ||
99 | |||
100 | # Write at the front: sector-wise, the request is: | ||
101 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io | ||
102 | $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map | ||
103 | |||
104 | # Repeat with backing file holding zero'd cluster | ||
105 | -# TODO: Note that this forces an allocation, because we aren't yet able to | ||
106 | -# quickly detect that reads beyond EOF of the backing file are always zero | ||
107 | $QEMU_IO -c "write -z $size 512" "$TEST_IMG.base" | _filter_qemu_io | ||
108 | |||
109 | # Write at the front: sector-wise, the request is: | ||
110 | diff --git a/tests/qemu-iotests/154.out b/tests/qemu-iotests/154.out | ||
111 | index XXXXXXX..XXXXXXX 100644 | ||
112 | --- a/tests/qemu-iotests/154.out | ||
113 | +++ b/tests/qemu-iotests/154.out | ||
114 | @@ -XXX,XX +XXX,XX @@ wrote 512/512 bytes at offset 134217728 | ||
115 | 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
116 | 2048/2048 bytes allocated at offset 128 MiB | ||
117 | [{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, | ||
118 | -{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] | ||
119 | +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] | ||
120 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base | ||
121 | wrote 512/512 bytes at offset 134219264 | ||
122 | 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
123 | 2048/2048 bytes allocated at offset 128 MiB | ||
124 | [{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, | ||
125 | -{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] | ||
126 | +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] | ||
127 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base | ||
128 | wrote 1024/1024 bytes at offset 134218240 | ||
129 | 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
130 | 2048/2048 bytes allocated at offset 128 MiB | ||
131 | [{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, | ||
132 | -{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] | ||
133 | +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] | ||
134 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base | ||
135 | wrote 2048/2048 bytes at offset 134217728 | ||
136 | 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
137 | @@ -XXX,XX +XXX,XX @@ wrote 512/512 bytes at offset 134217728 | ||
138 | 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
139 | 2048/2048 bytes allocated at offset 128 MiB | ||
140 | [{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, | ||
141 | -{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] | ||
142 | +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] | ||
143 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base | ||
144 | wrote 512/512 bytes at offset 134219264 | ||
145 | 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
146 | 2048/2048 bytes allocated at offset 128 MiB | ||
147 | [{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, | ||
148 | -{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] | ||
149 | +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] | ||
150 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base | ||
151 | wrote 1024/1024 bytes at offset 134218240 | ||
152 | 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
153 | 2048/2048 bytes allocated at offset 128 MiB | ||
154 | [{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false}, | ||
155 | -{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] | ||
156 | +{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}] | ||
157 | Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base | ||
158 | wrote 2048/2048 bytes at offset 134217728 | ||
159 | 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) | ||
25 | -- | 160 | -- |
26 | 2.9.3 | 161 | 2.9.4 |
27 | 162 | ||
28 | 163 | diff view generated by jsdifflib |