1
The following changes since commit 20d6c7312f1b812bb9c750f4087f69ac8485cc90:
1
The following changes since commit 3f3bbfc7cef4490c5ed5550766a81e7d18f08db1:
2
2
3
Merge remote-tracking branch 'remotes/palmer/tags/riscv-for-master-3.2-part1' into staging (2019-01-03 13:26:30 +0000)
3
Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2019-03-12' into staging (2019-03-12 21:06:26 +0000)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
git://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 39a0408e768cd00142f5b57d27ab234282bf4df5:
9
for you to fetch changes up to f357fcd890a8d6ced6d261338b859a41414561e9:
10
10
11
dmg: don't skip zero chunk (2019-01-04 11:15:09 +0000)
11
file-posix: add drop-cache=on|off option (2019-03-13 10:54:55 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
Bug fixes for the .dmg image file format.
16
* Add 'drop-cache=on|off' option to file-posix.c. The default is on.
17
Disabling the option fixes a QEMU 3.0.0 performance regression when live
18
migrating on the same host with cache.direct=off.
17
19
18
----------------------------------------------------------------
20
----------------------------------------------------------------
19
21
20
Julio Faracco (1):
22
Stefan Hajnoczi (1):
21
dmg: Fixing wrong dmg block type value for block terminator.
23
file-posix: add drop-cache=on|off option
22
24
23
yuchenlin (3):
25
qapi/block-core.json | 6 ++++++
24
dmg: fix binary search
26
block/file-posix.c | 16 ++++++++++++++++
25
dmg: use enumeration type instead of hard coding number
27
2 files changed, 22 insertions(+)
26
dmg: don't skip zero chunk
27
28
block/dmg.c | 31 ++++++++++++++++++++-----------
29
1 file changed, 20 insertions(+), 11 deletions(-)
30
28
31
--
29
--
32
2.20.1
30
2.20.1
33
31
34
32
diff view generated by jsdifflib
Deleted patch
1
From: Julio Faracco <jcfaracco@gmail.com>
2
1
3
This is a trivial patch to fix a wrong value for block terminator.
4
The old value was 0x7fffffff which is wrong. It was not affecting the
5
code because QEMU dmg block is not handling block terminator right now.
6
Neverthless, it should be fixed.
7
8
Signed-off-by: Julio Faracco <jcfaracco@gmail.com>
9
Reviewed-by: yuchenlin <yuchenlin@synology.com>
10
Message-id: 20181228145055.18039-1-jcfaracco@gmail.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/dmg.c | 2 +-
14
1 file changed, 1 insertion(+), 1 deletion(-)
15
16
diff --git a/block/dmg.c b/block/dmg.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/dmg.c
19
+++ b/block/dmg.c
20
@@ -XXX,XX +XXX,XX @@ enum {
21
UDBZ,
22
ULFO,
23
UDCM = 0x7ffffffe, /* Comments */
24
- UDLE /* Last Entry */
25
+ UDLE = 0xffffffff /* Last Entry */
26
};
27
28
static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
29
--
30
2.20.1
31
32
diff view generated by jsdifflib
1
From: yuchenlin <npes87184@gmail.com>
1
Commit dd577a26ff03b6829721b1ffbbf9e7c411b72378 ("block/file-posix:
2
implement bdrv_co_invalidate_cache() on Linux") introduced page cache
3
invalidation so that cache.direct=off live migration is safe on Linux.
2
4
3
There is a possible hang in original binary search implementation. That is
5
The invalidation takes a significant amount of time when the file is
4
if chunk1 = 4, chunk2 = 5, chunk3 = 4, and we go else case.
6
large and present in the page cache. Normally this is not the case for
7
cross-host live migration but it can happen when migrating between QEMU
8
processes on the same host.
5
9
6
The chunk1 will be still 4, and so on.
10
On same-host migration we don't need to invalidate pages for correctness
11
anyway, so an option to skip page cache invalidation is useful. I
12
investigated optimizing invalidation and detecting same-host migration,
13
but both are hard to achieve so a user-visible option will suffice.
7
14
8
Signed-off-by: yuchenlin <npes87184@gmail.com>
15
As a bonus this option means that the cache invalidation feature will
9
Message-id: 20190103114700.9686-2-npes87184@gmail.com
16
now be detectable by libvirt via QMP schema introspection.
17
18
Suggested-by: Neil Skrypuch <neil@tembosocial.com>
19
Tested-by: Neil Skrypuch <neil@tembosocial.com>
20
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
21
Reviewed-by: Eric Blake <eblake@redhat.com>
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
Message-id: 20190307164941.3322-1-stefanha@redhat.com
24
Message-Id: <20190307164941.3322-1-stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
26
---
12
block/dmg.c | 10 +++++++---
27
qapi/block-core.json | 6 ++++++
13
1 file changed, 7 insertions(+), 3 deletions(-)
28
block/file-posix.c | 16 ++++++++++++++++
29
2 files changed, 22 insertions(+)
14
30
15
diff --git a/block/dmg.c b/block/dmg.c
31
diff --git a/qapi/block-core.json b/qapi/block-core.json
16
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
17
--- a/block/dmg.c
33
--- a/qapi/block-core.json
18
+++ b/block/dmg.c
34
+++ b/qapi/block-core.json
19
@@ -XXX,XX +XXX,XX @@ static inline uint32_t search_chunk(BDRVDMGState *s, uint64_t sector_num)
35
@@ -XXX,XX +XXX,XX @@
20
{
36
# @locking: whether to enable file locking. If set to 'auto', only enable
21
/* binary search */
37
# when Open File Descriptor (OFD) locking API is available
22
uint32_t chunk1 = 0, chunk2 = s->n_chunks, chunk3;
38
# (default: auto, since 2.10)
23
- while (chunk1 != chunk2) {
39
+# @drop-cache: invalidate page cache during live migration. This prevents
24
+ while (chunk1 <= chunk2) {
40
+# stale data on the migration destination with cache.direct=off.
25
chunk3 = (chunk1 + chunk2) / 2;
41
+# Currently only supported on Linux hosts.
26
if (s->sectors[chunk3] > sector_num) {
42
+# (default: on, since: 4.0)
27
- chunk2 = chunk3;
43
# @x-check-cache-dropped: whether to check that page cache was dropped on live
28
+ if (chunk3 == 0) {
44
# migration. May cause noticeable delays if the image
29
+ goto err;
45
# file is large, do not use in production.
30
+ }
46
@@ -XXX,XX +XXX,XX @@
31
+ chunk2 = chunk3 - 1;
47
'*pr-manager': 'str',
32
} else if (s->sectors[chunk3] + s->sectorcounts[chunk3] > sector_num) {
48
'*locking': 'OnOffAuto',
33
return chunk3;
49
'*aio': 'BlockdevAioOptions',
34
} else {
50
+     '*drop-cache': {'type': 'bool',
35
- chunk1 = chunk3;
51
+     'if': 'defined(CONFIG_LINUX)'},
36
+ chunk1 = chunk3 + 1;
52
'*x-check-cache-dropped': 'bool' } }
53
54
##
55
diff --git a/block/file-posix.c b/block/file-posix.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/block/file-posix.c
58
+++ b/block/file-posix.c
59
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState {
60
bool page_cache_inconsistent:1;
61
bool has_fallocate;
62
bool needs_alignment;
63
+ bool drop_cache;
64
bool check_cache_dropped;
65
66
PRManager *pr_mgr;
67
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState {
68
typedef struct BDRVRawReopenState {
69
int fd;
70
int open_flags;
71
+ bool drop_cache;
72
bool check_cache_dropped;
73
} BDRVRawReopenState;
74
75
@@ -XXX,XX +XXX,XX @@ static QemuOptsList raw_runtime_opts = {
76
.type = QEMU_OPT_STRING,
77
.help = "id of persistent reservation manager object (default: none)",
78
},
79
+#if defined(__linux__)
80
+ {
81
+ .name = "drop-cache",
82
+ .type = QEMU_OPT_BOOL,
83
+ .help = "invalidate page cache during live migration (default: on)",
84
+ },
85
+#endif
86
{
87
.name = "x-check-cache-dropped",
88
.type = QEMU_OPT_BOOL,
89
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
37
}
90
}
38
}
91
}
39
+err:
92
40
return s->n_chunks; /* error */
93
+ s->drop_cache = qemu_opt_get_bool(opts, "drop-cache", true);
41
}
94
s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
42
95
false);
96
97
@@ -XXX,XX +XXX,XX @@ static int raw_reopen_prepare(BDRVReopenState *state,
98
goto out;
99
}
100
101
+ rs->drop_cache = qemu_opt_get_bool_del(opts, "drop-cache", true);
102
rs->check_cache_dropped =
103
qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false);
104
105
@@ -XXX,XX +XXX,XX @@ static void raw_reopen_commit(BDRVReopenState *state)
106
BDRVRawState *s = state->bs->opaque;
107
Error *local_err = NULL;
108
109
+ s->drop_cache = rs->drop_cache;
110
s->check_cache_dropped = rs->check_cache_dropped;
111
s->open_flags = rs->open_flags;
112
113
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
114
return;
115
}
116
117
+ if (!s->drop_cache) {
118
+ return;
119
+ }
120
+
121
if (s->open_flags & O_DIRECT) {
122
return; /* No host kernel page cache */
123
}
43
--
124
--
44
2.20.1
125
2.20.1
45
126
46
127
diff view generated by jsdifflib
Deleted patch
1
From: yuchenlin <npes87184@gmail.com>
2
1
3
Signed-off-by: yuchenlin <npes87184@gmail.com>
4
Reviewed-by: Julio Faracco <jcfaracco@gmail.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20190103114700.9686-3-npes87184@gmail.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
block/dmg.c | 4 ++--
10
1 file changed, 2 insertions(+), 2 deletions(-)
11
12
diff --git a/block/dmg.c b/block/dmg.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/block/dmg.c
15
+++ b/block/dmg.c
16
@@ -XXX,XX +XXX,XX @@ static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
17
18
/* all-zeroes sector (type 2) does not need to be "uncompressed" and can
19
* therefore be unbounded. */
20
- if (s->types[i] != 2 && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
21
+ if (s->types[i] != UDIG && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
22
error_report("sector count %" PRIu64 " for chunk %" PRIu32
23
" is larger than max (%u)",
24
s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
25
@@ -XXX,XX +XXX,XX @@ dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
26
/* Special case: current chunk is all zeroes. Do not perform a memcpy as
27
* s->uncompressed_chunk may be too small to cover the large all-zeroes
28
* section. dmg_read_chunk is called to find s->current_chunk */
29
- if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
30
+ if (s->types[s->current_chunk] == UDIG) { /* all zeroes block entry */
31
qemu_iovec_memset(qiov, i * 512, 0, 512);
32
continue;
33
}
34
--
35
2.20.1
36
37
diff view generated by jsdifflib
Deleted patch
1
From: yuchenlin <npes87184@gmail.com>
2
1
3
The dmg file has many tables which describe: "start from sector XXX to
4
sector XXX, the compression method is XXX and where the compressed data
5
resides on".
6
7
Each sector in the expanded file should be covered by a table. The table
8
will describe the offset of compressed data (or raw depends on the type)
9
in the dmg.
10
11
For example:
12
13
[-----------The expanded file------------]
14
[---bzip table ---]/* zeros */[---zlib---]
15
^
16
| if we want to read this sector.
17
18
we will find bzip table which contains this sector, and get the
19
compressed data offset, read it from dmg, uncompress it, finally write to
20
expanded file.
21
22
If we skip zero chunk (table), some sector cannot find the table which
23
will cause search_chunk() return s->n_chunks, dmg_read_chunk() return -1
24
and finally causing dmg_co_preadv() return EIO.
25
26
See:
27
28
[-----------The expanded file------------]
29
[---bzip table ---]/* zeros */[---zlib---]
30
^
31
| if we want to read this sector.
32
33
Oops, we cannot find the table contains it...
34
35
In the original implementation, we don't have zero table. When we try to
36
read sector inside the zero chunk. We will get EIO, and skip reading.
37
38
After this patch, we treat zero chunk the same as ignore chunk, it will
39
directly write zero and avoid some sector may not find the table.
40
41
After this patch:
42
43
[-----------The expanded file------------]
44
[---bzip table ---][--zeros--][---zlib---]
45
46
Signed-off-by: yuchenlin <npes87184@gmail.com>
47
Reviewed-by: Julio Faracco <jcfaracco@gmail.com>
48
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
49
Message-id: 20190103114700.9686-4-npes87184@gmail.com
50
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
51
---
52
block/dmg.c | 19 ++++++++++++-------
53
1 file changed, 12 insertions(+), 7 deletions(-)
54
55
diff --git a/block/dmg.c b/block/dmg.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/block/dmg.c
58
+++ b/block/dmg.c
59
@@ -XXX,XX +XXX,XX @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
60
case UDRW: /* copy */
61
uncompressed_sectors = DIV_ROUND_UP(s->lengths[chunk], 512);
62
break;
63
- case UDIG: /* zero */
64
+ case UDZE: /* zero */
65
+ case UDIG: /* ignore */
66
/* as the all-zeroes block may be large, it is treated specially: the
67
* sector is not copied from a large buffer, a simple memset is used
68
* instead. Therefore uncompressed_sectors does not need to be set. */
69
@@ -XXX,XX +XXX,XX @@ typedef struct DmgHeaderState {
70
static bool dmg_is_known_block_type(uint32_t entry_type)
71
{
72
switch (entry_type) {
73
+ case UDZE: /* zeros */
74
case UDRW: /* uncompressed */
75
- case UDIG: /* zeroes */
76
+ case UDIG: /* ignore */
77
case UDZO: /* zlib */
78
return true;
79
case UDBZ: /* bzip2 */
80
@@ -XXX,XX +XXX,XX @@ static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
81
/* sector count */
82
s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10);
83
84
- /* all-zeroes sector (type 2) does not need to be "uncompressed" and can
85
- * therefore be unbounded. */
86
- if (s->types[i] != UDIG && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
87
+ /* all-zeroes sector (type UDZE and UDIG) does not need to be
88
+ * "uncompressed" and can therefore be unbounded. */
89
+ if (s->types[i] != UDZE && s->types[i] != UDIG
90
+ && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
91
error_report("sector count %" PRIu64 " for chunk %" PRIu32
92
" is larger than max (%u)",
93
s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
94
@@ -XXX,XX +XXX,XX @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
95
return -1;
96
}
97
break;
98
- case UDIG: /* zero */
99
+ case UDZE: /* zeros */
100
+ case UDIG: /* ignore */
101
/* see dmg_read, it is treated specially. No buffer needs to be
102
* pre-filled, the zeroes can be set directly. */
103
break;
104
@@ -XXX,XX +XXX,XX @@ dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
105
/* Special case: current chunk is all zeroes. Do not perform a memcpy as
106
* s->uncompressed_chunk may be too small to cover the large all-zeroes
107
* section. dmg_read_chunk is called to find s->current_chunk */
108
- if (s->types[s->current_chunk] == UDIG) { /* all zeroes block entry */
109
+ if (s->types[s->current_chunk] == UDZE
110
+ || s->types[s->current_chunk] == UDIG) { /* all zeroes block entry */
111
qemu_iovec_memset(qiov, i * 512, 0, 512);
112
continue;
113
}
114
--
115
2.20.1
116
117
diff view generated by jsdifflib