1
The following changes since commit a73549f99612f758dec0fdea6ae1c30b6c709a0b:
1
The following changes since commit 39e19f5f67d925c60278a6156fd1776d04495a93:
2
2
3
Merge remote-tracking branch 'remotes/kraxel/tags/ui-20181012-pull-request' into staging (2018-10-12 16:45:51 +0100)
3
Merge tag 'pull-xen-20220705' of https://xenbits.xen.org/git-http/people/aperard/qemu-dm into staging (2022-07-05 22:13:51 +0530)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to a7ec0077c2db445d6bae421963188367d2695bd6:
9
for you to fetch changes up to a495eba03c31c96d6a0817b13598ce2219326691:
10
10
11
qemu-options: Fix bad "macaddr" property in the documentation (2018-10-15 16:14:15 +0800)
11
ebpf: replace deprecated bpf_program__set_socket_filter (2022-07-06 11:39:09 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Jason Wang (4):
16
Ding Hui (1):
17
ne2000: fix possible out of bound access in ne2000_receive
17
e1000: set RX descriptor status in a separate operation
18
rtl8139: fix possible out of bound access
19
pcnet: fix possible buffer overflow
20
net: ignore packet size greater than INT_MAX
21
18
22
Martin Wilck (1):
19
Haochen Tong (1):
23
e1000: indicate dropped packets in HW counters
20
ebpf: replace deprecated bpf_program__set_socket_filter
24
21
25
Thomas Huth (1):
22
ebpf/ebpf_rss.c | 2 +-
26
qemu-options: Fix bad "macaddr" property in the documentation
23
hw/net/e1000.c | 5 ++++-
27
24
2 files changed, 5 insertions(+), 2 deletions(-)
28
Zhang Chen (15):
29
filter-rewriter: Add TCP state machine and fix memory leak in connection_track_table
30
colo-compare: implement the process of checkpoint
31
colo-compare: use notifier to notify packets comparing result
32
1;5202;0c1;5202;0c COLO: integrate colo compare with colo frame
33
COLO: Add block replication into colo process
34
COLO: Remove colo_state migration struct
35
COLO: Load dirty pages into SVM's RAM cache firstly
36
ram/COLO: Record the dirty pages that SVM received
37
COLO: Flush memory data from ram cache
38
qapi/migration.json: Rename COLO unknown mode to none mode.
39
qapi: Add new command to query colo status
40
savevm: split the process of different stages for loadvm/savevm
41
filter: Add handle_event method for NetFilterClass
42
filter-rewriter: handle checkpoint and failover event
43
docs: Add COLO status diagram to COLO-FT.txt
44
45
liujunjie (1):
46
clean up callback when del virtqueue
47
48
zhanghailiang (4):
49
qmp event: Add COLO_EXIT event to notify users while exited COLO
50
COLO: flush host dirty ram from cache
51
COLO: notify net filters about checkpoint/failover event
52
COLO: quick failover process by kick COLO thread
53
54
docs/COLO-FT.txt | 34 ++++++++
55
hw/net/e1000.c | 16 +++-
56
hw/net/ne2000.c | 4 +-
57
hw/net/pcnet.c | 4 +-
58
hw/net/rtl8139.c | 8 +-
59
hw/net/trace-events | 3 +
60
hw/virtio/virtio.c | 2 +
61
include/exec/ram_addr.h | 1 +
62
include/migration/colo.h | 11 ++-
63
include/net/filter.h | 5 ++
64
migration/Makefile.objs | 2 +-
65
migration/colo-comm.c | 76 -----------------
66
migration/colo-failover.c | 2 +-
67
migration/colo.c | 212 +++++++++++++++++++++++++++++++++++++++++++---
68
migration/migration.c | 46 ++++++++--
69
migration/ram.c | 166 +++++++++++++++++++++++++++++++++++-
70
migration/ram.h | 4 +
71
migration/savevm.c | 53 ++++++++++--
72
migration/savevm.h | 5 ++
73
migration/trace-events | 3 +
74
net/colo-compare.c | 115 ++++++++++++++++++++++---
75
net/colo-compare.h | 24 ++++++
76
net/colo.c | 10 ++-
77
net/colo.h | 11 +--
78
net/filter-rewriter.c | 166 +++++++++++++++++++++++++++++++++---
79
net/filter.c | 17 ++++
80
net/net.c | 26 +++++-
81
qapi/migration.json | 80 +++++++++++++++--
82
qemu-options.hx | 2 +-
83
vl.c | 2 -
84
30 files changed, 958 insertions(+), 152 deletions(-)
85
delete mode 100644 migration/colo-comm.c
86
create mode 100644 net/colo-compare.h
87
88
89
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We add almost full TCP state machine in filter-rewriter, except
4
TCPS_LISTEN and some simplify in VM active close FIN states.
5
The reason for this simplify job is because guest kernel will track
6
the TCP status and wait 2MSL time too, if client resend the FIN packet,
7
guest will resend the last ACK, so we needn't wait 2MSL time in filter-rewriter.
8
9
After a net connection is closed, we didn't clear its related resources
10
in connection_track_table, which will lead to memory leak.
11
12
Let's track the state of net connection, if it is closed, its related
13
resources will be cleared up.
14
15
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
16
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
17
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
20
net/colo.c | 2 +-
21
net/colo.h | 9 ++---
22
net/filter-rewriter.c | 109 +++++++++++++++++++++++++++++++++++++++++++++-----
23
3 files changed, 104 insertions(+), 16 deletions(-)
24
25
diff --git a/net/colo.c b/net/colo.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/net/colo.c
28
+++ b/net/colo.c
29
@@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key)
30
conn->ip_proto = key->ip_proto;
31
conn->processing = false;
32
conn->offset = 0;
33
- conn->syn_flag = 0;
34
+ conn->tcp_state = TCPS_CLOSED;
35
conn->pack = 0;
36
conn->sack = 0;
37
g_queue_init(&conn->primary_list);
38
diff --git a/net/colo.h b/net/colo.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/net/colo.h
41
+++ b/net/colo.h
42
@@ -XXX,XX +XXX,XX @@
43
#include "slirp/slirp.h"
44
#include "qemu/jhash.h"
45
#include "qemu/timer.h"
46
+#include "slirp/tcp.h"
47
48
#define HASHTABLE_MAX_SIZE 16384
49
50
@@ -XXX,XX +XXX,XX @@ typedef struct Connection {
51
uint32_t sack;
52
/* offset = secondary_seq - primary_seq */
53
tcp_seq offset;
54
- /*
55
- * we use this flag update offset func
56
- * run once in independent tcp connection
57
- */
58
- int syn_flag;
59
+
60
+ int tcp_state; /* TCP FSM state */
61
+ tcp_seq fin_ack_seq; /* the seq of 'fin=1,ack=1' */
62
} Connection;
63
64
uint32_t connection_key_hash(const void *opaque);
65
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/net/filter-rewriter.c
68
+++ b/net/filter-rewriter.c
69
@@ -XXX,XX +XXX,XX @@ static int is_tcp_packet(Packet *pkt)
70
}
71
72
/* handle tcp packet from primary guest */
73
-static int handle_primary_tcp_pkt(NetFilterState *nf,
74
+static int handle_primary_tcp_pkt(RewriterState *rf,
75
Connection *conn,
76
- Packet *pkt)
77
+ Packet *pkt, ConnectionKey *key)
78
{
79
struct tcphdr *tcp_pkt;
80
81
@@ -XXX,XX +XXX,XX @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
82
trace_colo_filter_rewriter_conn_offset(conn->offset);
83
}
84
85
+ if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
86
+ conn->tcp_state == TCPS_SYN_SENT) {
87
+ conn->tcp_state = TCPS_ESTABLISHED;
88
+ }
89
+
90
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
91
/*
92
* we use this flag update offset func
93
* run once in independent tcp connection
94
*/
95
- conn->syn_flag = 1;
96
+ conn->tcp_state = TCPS_SYN_RECEIVED;
97
}
98
99
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
100
- if (conn->syn_flag) {
101
+ if (conn->tcp_state == TCPS_SYN_RECEIVED) {
102
/*
103
* offset = secondary_seq - primary seq
104
* ack packet sent by guest from primary node,
105
* so we use th_ack - 1 get primary_seq
106
*/
107
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
108
- conn->syn_flag = 0;
109
+ conn->tcp_state = TCPS_ESTABLISHED;
110
}
111
if (conn->offset) {
112
/* handle packets to the secondary from the primary */
113
@@ -XXX,XX +XXX,XX @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
114
net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
115
pkt->size - pkt->vnet_hdr_len);
116
}
117
+
118
+ /*
119
+ * Passive close step 3
120
+ */
121
+ if ((conn->tcp_state == TCPS_LAST_ACK) &&
122
+ (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
123
+ conn->tcp_state = TCPS_CLOSED;
124
+ g_hash_table_remove(rf->connection_track_table, key);
125
+ }
126
+ }
127
+
128
+ if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
129
+ /*
130
+ * Passive close.
131
+ * Step 1:
132
+ * The *server* side of this connect is VM, *client* tries to close
133
+ * the connection. We will into CLOSE_WAIT status.
134
+ *
135
+ * Step 2:
136
+ * In this step we will into LAST_ACK status.
137
+ *
138
+ * We got 'fin=1, ack=1' packet from server side, we need to
139
+ * record the seq of 'fin=1, ack=1' packet.
140
+ *
141
+ * Step 3:
142
+ * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
143
+ * packet from server side. From this point, we can ensure that there
144
+ * will be no packets in the connection, except that, some errors
145
+ * happen between the path of 'filter object' and vNIC, if this rare
146
+ * case really happen, we can still create a new connection,
147
+ * So it is safe to remove the connection from connection_track_table.
148
+ *
149
+ */
150
+ if (conn->tcp_state == TCPS_ESTABLISHED) {
151
+ conn->tcp_state = TCPS_CLOSE_WAIT;
152
+ }
153
+
154
+ /*
155
+ * Active close step 2.
156
+ */
157
+ if (conn->tcp_state == TCPS_FIN_WAIT_1) {
158
+ conn->tcp_state = TCPS_TIME_WAIT;
159
+ /*
160
+ * For simplify implementation, we needn't wait 2MSL time
161
+ * in filter rewriter. Because guest kernel will track the
162
+ * TCP status and wait 2MSL time, if client resend the FIN
163
+ * packet, guest will apply the last ACK too.
164
+ */
165
+ conn->tcp_state = TCPS_CLOSED;
166
+ g_hash_table_remove(rf->connection_track_table, key);
167
+ }
168
}
169
170
return 0;
171
}
172
173
/* handle tcp packet from secondary guest */
174
-static int handle_secondary_tcp_pkt(NetFilterState *nf,
175
+static int handle_secondary_tcp_pkt(RewriterState *rf,
176
Connection *conn,
177
- Packet *pkt)
178
+ Packet *pkt, ConnectionKey *key)
179
{
180
struct tcphdr *tcp_pkt;
181
182
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
183
trace_colo_filter_rewriter_conn_offset(conn->offset);
184
}
185
186
- if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
187
+ if (conn->tcp_state == TCPS_SYN_RECEIVED &&
188
+ ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
189
/*
190
* save offset = secondary_seq and then
191
* in handle_primary_tcp_pkt make offset
192
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
193
conn->offset = ntohl(tcp_pkt->th_seq);
194
}
195
196
+ /* VM active connect */
197
+ if (conn->tcp_state == TCPS_CLOSED &&
198
+ ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
199
+ conn->tcp_state = TCPS_SYN_SENT;
200
+ }
201
+
202
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
203
/* Only need to adjust seq while offset is Non-zero */
204
if (conn->offset) {
205
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
206
}
207
}
208
209
+ /*
210
+ * Passive close step 2:
211
+ */
212
+ if (conn->tcp_state == TCPS_CLOSE_WAIT &&
213
+ (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
214
+ conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
215
+ conn->tcp_state = TCPS_LAST_ACK;
216
+ }
217
+
218
+ /*
219
+ * Active close
220
+ *
221
+ * Step 1:
222
+ * The *server* side of this connect is VM, *server* tries to close
223
+ * the connection.
224
+ *
225
+ * Step 2:
226
+ * We will into CLOSE_WAIT status.
227
+ * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
228
+ * CLOSING status.
229
+ */
230
+ if (conn->tcp_state == TCPS_ESTABLISHED &&
231
+ (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
232
+ conn->tcp_state = TCPS_FIN_WAIT_1;
233
+ }
234
+
235
return 0;
236
}
237
238
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
239
240
if (sender == nf->netdev) {
241
/* NET_FILTER_DIRECTION_TX */
242
- if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
243
+ if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
244
qemu_net_queue_send(s->incoming_queue, sender, 0,
245
(const uint8_t *)pkt->data, pkt->size, NULL);
246
packet_destroy(pkt, NULL);
247
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
248
}
249
} else {
250
/* NET_FILTER_DIRECTION_RX */
251
- if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
252
+ if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
253
qemu_net_queue_send(s->incoming_queue, sender, 0,
254
(const uint8_t *)pkt->data, pkt->size, NULL);
255
packet_destroy(pkt, NULL);
256
--
257
2.5.0
258
259
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
While do checkpoint, we need to flush all the unhandled packets,
4
By using the filter notifier mechanism, we can easily to notify
5
every compare object to do this process, which runs inside
6
of compare threads as a coroutine.
7
8
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
9
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
10
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
include/migration/colo.h | 6 ++++
14
net/colo-compare.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
15
net/colo-compare.h | 22 ++++++++++++++
16
3 files changed, 106 insertions(+)
17
create mode 100644 net/colo-compare.h
18
19
diff --git a/include/migration/colo.h b/include/migration/colo.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/migration/colo.h
22
+++ b/include/migration/colo.h
23
@@ -XXX,XX +XXX,XX @@
24
#include "qemu-common.h"
25
#include "qapi/qapi-types-migration.h"
26
27
+enum colo_event {
28
+ COLO_EVENT_NONE,
29
+ COLO_EVENT_CHECKPOINT,
30
+ COLO_EVENT_FAILOVER,
31
+};
32
+
33
void colo_info_init(void);
34
35
void migrate_start_colo_process(MigrationState *s);
36
diff --git a/net/colo-compare.c b/net/colo-compare.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/net/colo-compare.c
39
+++ b/net/colo-compare.c
40
@@ -XXX,XX +XXX,XX @@
41
#include "qemu/sockets.h"
42
#include "colo.h"
43
#include "sysemu/iothread.h"
44
+#include "net/colo-compare.h"
45
+#include "migration/colo.h"
46
47
#define TYPE_COLO_COMPARE "colo-compare"
48
#define COLO_COMPARE(obj) \
49
OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
50
51
+static QTAILQ_HEAD(, CompareState) net_compares =
52
+ QTAILQ_HEAD_INITIALIZER(net_compares);
53
+
54
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
55
#define MAX_QUEUE_SIZE 1024
56
57
@@ -XXX,XX +XXX,XX @@
58
/* TODO: Should be configurable */
59
#define REGULAR_PACKET_CHECK_MS 3000
60
61
+static QemuMutex event_mtx;
62
+static QemuCond event_complete_cond;
63
+static int event_unhandled_count;
64
+
65
/*
66
* + CompareState ++
67
* | |
68
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
69
IOThread *iothread;
70
GMainContext *worker_context;
71
QEMUTimer *packet_check_timer;
72
+
73
+ QEMUBH *event_bh;
74
+ enum colo_event event;
75
+
76
+ QTAILQ_ENTRY(CompareState) next;
77
} CompareState;
78
79
typedef struct CompareClass {
80
@@ -XXX,XX +XXX,XX @@ static void check_old_packet_regular(void *opaque)
81
REGULAR_PACKET_CHECK_MS);
82
}
83
84
+/* Public API, Used for COLO frame to notify compare event */
85
+void colo_notify_compares_event(void *opaque, int event, Error **errp)
86
+{
87
+ CompareState *s;
88
+
89
+ qemu_mutex_lock(&event_mtx);
90
+ QTAILQ_FOREACH(s, &net_compares, next) {
91
+ s->event = event;
92
+ qemu_bh_schedule(s->event_bh);
93
+ event_unhandled_count++;
94
+ }
95
+ /* Wait all compare threads to finish handling this event */
96
+ while (event_unhandled_count > 0) {
97
+ qemu_cond_wait(&event_complete_cond, &event_mtx);
98
+ }
99
+
100
+ qemu_mutex_unlock(&event_mtx);
101
+}
102
+
103
static void colo_compare_timer_init(CompareState *s)
104
{
105
AioContext *ctx = iothread_get_aio_context(s->iothread);
106
@@ -XXX,XX +XXX,XX @@ static void colo_compare_timer_del(CompareState *s)
107
}
108
}
109
110
+static void colo_flush_packets(void *opaque, void *user_data);
111
+
112
+static void colo_compare_handle_event(void *opaque)
113
+{
114
+ CompareState *s = opaque;
115
+
116
+ switch (s->event) {
117
+ case COLO_EVENT_CHECKPOINT:
118
+ g_queue_foreach(&s->conn_list, colo_flush_packets, s);
119
+ break;
120
+ case COLO_EVENT_FAILOVER:
121
+ break;
122
+ default:
123
+ break;
124
+ }
125
+
126
+ assert(event_unhandled_count > 0);
127
+
128
+ qemu_mutex_lock(&event_mtx);
129
+ event_unhandled_count--;
130
+ qemu_cond_broadcast(&event_complete_cond);
131
+ qemu_mutex_unlock(&event_mtx);
132
+}
133
+
134
static void colo_compare_iothread(CompareState *s)
135
{
136
object_ref(OBJECT(s->iothread));
137
@@ -XXX,XX +XXX,XX @@ static void colo_compare_iothread(CompareState *s)
138
s, s->worker_context, true);
139
140
colo_compare_timer_init(s);
141
+ s->event_bh = qemu_bh_new(colo_compare_handle_event, s);
142
}
143
144
static char *compare_get_pri_indev(Object *obj, Error **errp)
145
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
146
net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
147
net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
148
149
+ QTAILQ_INSERT_TAIL(&net_compares, s, next);
150
+
151
g_queue_init(&s->conn_list);
152
153
+ qemu_mutex_init(&event_mtx);
154
+ qemu_cond_init(&event_complete_cond);
155
+
156
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
157
connection_key_equal,
158
g_free,
159
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
160
static void colo_compare_finalize(Object *obj)
161
{
162
CompareState *s = COLO_COMPARE(obj);
163
+ CompareState *tmp = NULL;
164
165
qemu_chr_fe_deinit(&s->chr_pri_in, false);
166
qemu_chr_fe_deinit(&s->chr_sec_in, false);
167
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
168
if (s->iothread) {
169
colo_compare_timer_del(s);
170
}
171
+
172
+ qemu_bh_delete(s->event_bh);
173
+
174
+ QTAILQ_FOREACH(tmp, &net_compares, next) {
175
+ if (tmp == s) {
176
+ QTAILQ_REMOVE(&net_compares, s, next);
177
+ break;
178
+ }
179
+ }
180
+
181
/* Release all unhandled packets after compare thead exited */
182
g_queue_foreach(&s->conn_list, colo_flush_packets, s);
183
184
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
185
if (s->iothread) {
186
object_unref(OBJECT(s->iothread));
187
}
188
+
189
+ qemu_mutex_destroy(&event_mtx);
190
+ qemu_cond_destroy(&event_complete_cond);
191
+
192
g_free(s->pri_indev);
193
g_free(s->sec_indev);
194
g_free(s->outdev);
195
diff --git a/net/colo-compare.h b/net/colo-compare.h
196
new file mode 100644
197
index XXXXXXX..XXXXXXX
198
--- /dev/null
199
+++ b/net/colo-compare.h
200
@@ -XXX,XX +XXX,XX @@
201
+/*
202
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
203
+ * (a.k.a. Fault Tolerance or Continuous Replication)
204
+ *
205
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
206
+ * Copyright (c) 2017 FUJITSU LIMITED
207
+ * Copyright (c) 2017 Intel Corporation
208
+ *
209
+ * Authors:
210
+ * zhanghailiang <zhang.zhanghailiang@huawei.com>
211
+ * Zhang Chen <zhangckid@gmail.com>
212
+ *
213
+ * This work is licensed under the terms of the GNU GPL, version 2 or
214
+ * later. See the COPYING file in the top-level directory.
215
+ */
216
+
217
+#ifndef QEMU_COLO_COMPARE_H
218
+#define QEMU_COLO_COMPARE_H
219
+
220
+void colo_notify_compares_event(void *opaque, int event, Error **errp);
221
+
222
+#endif /* QEMU_COLO_COMPARE_H */
223
--
224
2.5.0
225
226
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
It's a good idea to use notifier to notify COLO frame of
4
inconsistent packets comparing.
5
6
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
net/colo-compare.c | 37 ++++++++++++++++++++++++++-----------
12
net/colo-compare.h | 2 ++
13
2 files changed, 28 insertions(+), 11 deletions(-)
14
15
diff --git a/net/colo-compare.c b/net/colo-compare.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/net/colo-compare.c
18
+++ b/net/colo-compare.c
19
@@ -XXX,XX +XXX,XX @@
20
#include "sysemu/iothread.h"
21
#include "net/colo-compare.h"
22
#include "migration/colo.h"
23
+#include "migration/migration.h"
24
25
#define TYPE_COLO_COMPARE "colo-compare"
26
#define COLO_COMPARE(obj) \
27
@@ -XXX,XX +XXX,XX @@
28
static QTAILQ_HEAD(, CompareState) net_compares =
29
QTAILQ_HEAD_INITIALIZER(net_compares);
30
31
+static NotifierList colo_compare_notifiers =
32
+ NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers);
33
+
34
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
35
#define MAX_QUEUE_SIZE 1024
36
37
@@ -XXX,XX +XXX,XX @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
38
return false;
39
}
40
41
+static void colo_compare_inconsistency_notify(void)
42
+{
43
+ notifier_list_notify(&colo_compare_notifiers,
44
+ migrate_get_current());
45
+}
46
+
47
static void colo_compare_tcp(CompareState *s, Connection *conn)
48
{
49
Packet *ppkt = NULL, *spkt = NULL;
50
@@ -XXX,XX +XXX,XX @@ sec:
51
qemu_hexdump((char *)spkt->data, stderr,
52
"colo-compare spkt", spkt->size);
53
54
- /*
55
- * colo_compare_inconsistent_notify();
56
- * TODO: notice to checkpoint();
57
- */
58
+ colo_compare_inconsistency_notify();
59
}
60
}
61
62
@@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
63
}
64
}
65
66
+void colo_compare_register_notifier(Notifier *notify)
67
+{
68
+ notifier_list_add(&colo_compare_notifiers, notify);
69
+}
70
+
71
+void colo_compare_unregister_notifier(Notifier *notify)
72
+{
73
+ notifier_remove(notify);
74
+}
75
+
76
static int colo_old_packet_check_one_conn(Connection *conn,
77
- void *user_data)
78
+ void *user_data)
79
{
80
GList *result = NULL;
81
int64_t check_time = REGULAR_PACKET_CHECK_MS;
82
@@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one_conn(Connection *conn,
83
84
if (result) {
85
/* Do checkpoint will flush old packet */
86
- /*
87
- * TODO: Notify colo frame to do checkpoint.
88
- * colo_compare_inconsistent_notify();
89
- */
90
+ colo_compare_inconsistency_notify();
91
return 0;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
95
/*
96
* If one packet arrive late, the secondary_list or
97
* primary_list will be empty, so we can't compare it
98
- * until next comparison.
99
+ * until next comparison. If the packets in the list are
100
+ * timeout, it will trigger a checkpoint request.
101
*/
102
trace_colo_compare_main("packet different");
103
g_queue_push_head(&conn->primary_list, pkt);
104
- /* TODO: colo_notify_checkpoint();*/
105
+ colo_compare_inconsistency_notify();
106
break;
107
}
108
}
109
diff --git a/net/colo-compare.h b/net/colo-compare.h
110
index XXXXXXX..XXXXXXX 100644
111
--- a/net/colo-compare.h
112
+++ b/net/colo-compare.h
113
@@ -XXX,XX +XXX,XX @@
114
#define QEMU_COLO_COMPARE_H
115
116
void colo_notify_compares_event(void *opaque, int event, Error **errp);
117
+void colo_compare_register_notifier(Notifier *notify);
118
+void colo_compare_unregister_notifier(Notifier *notify);
119
120
#endif /* QEMU_COLO_COMPARE_H */
121
--
122
2.5.0
123
124
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
For COLO FT, both the PVM and SVM run at the same time,
4
only sync the state while it needs.
5
6
So here, let SVM runs while not doing checkpoint, change
7
DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100.
8
9
Besides, we forgot to release colo_checkpoint_semd and
10
colo_delay_timer, fix them here.
11
12
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
migration/colo.c | 42 ++++++++++++++++++++++++++++++++++++++++--
19
migration/migration.c | 6 ++----
20
2 files changed, 42 insertions(+), 6 deletions(-)
21
22
diff --git a/migration/colo.c b/migration/colo.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/migration/colo.c
25
+++ b/migration/colo.c
26
@@ -XXX,XX +XXX,XX @@
27
#include "qemu/error-report.h"
28
#include "migration/failover.h"
29
#include "replication.h"
30
+#include "net/colo-compare.h"
31
+#include "net/colo.h"
32
33
static bool vmstate_loading;
34
+static Notifier packets_compare_notifier;
35
36
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
37
38
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
39
goto out;
40
}
41
42
+ colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
43
+ if (local_err) {
44
+ goto out;
45
+ }
46
+
47
/* Disable block migration */
48
migrate_set_block_enabled(false, &local_err);
49
qemu_savevm_state_header(fb);
50
@@ -XXX,XX +XXX,XX @@ out:
51
return ret;
52
}
53
54
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
55
+{
56
+ colo_checkpoint_notify(data);
57
+}
58
+
59
static void colo_process_checkpoint(MigrationState *s)
60
{
61
QIOChannelBuffer *bioc;
62
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
63
goto out;
64
}
65
66
+ packets_compare_notifier.notify = colo_compare_notify_checkpoint;
67
+ colo_compare_register_notifier(&packets_compare_notifier);
68
+
69
/*
70
* Wait for Secondary finish loading VM states and enter COLO
71
* restore.
72
@@ -XXX,XX +XXX,XX @@ out:
73
qemu_fclose(fb);
74
}
75
76
- timer_del(s->colo_delay_timer);
77
-
78
/* Hope this not to be too long to wait here */
79
qemu_sem_wait(&s->colo_exit_sem);
80
qemu_sem_destroy(&s->colo_exit_sem);
81
+
82
+ /*
83
+ * It is safe to unregister notifier after failover finished.
84
+ * Besides, colo_delay_timer and colo_checkpoint_sem can't be
85
+ * released befor unregister notifier, or there will be use-after-free
86
+ * error.
87
+ */
88
+ colo_compare_unregister_notifier(&packets_compare_notifier);
89
+ timer_del(s->colo_delay_timer);
90
+ timer_free(s->colo_delay_timer);
91
+ qemu_sem_destroy(&s->colo_checkpoint_sem);
92
+
93
/*
94
* Must be called after failover BH is completed,
95
* Or the failover BH may shutdown the wrong fd that
96
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
97
fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
98
object_unref(OBJECT(bioc));
99
100
+ qemu_mutex_lock_iothread();
101
+ vm_start();
102
+ trace_colo_vm_state_change("stop", "run");
103
+ qemu_mutex_unlock_iothread();
104
+
105
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
106
&local_err);
107
if (local_err) {
108
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
109
goto out;
110
}
111
112
+ qemu_mutex_lock_iothread();
113
+ vm_stop_force_state(RUN_STATE_COLO);
114
+ trace_colo_vm_state_change("run", "stop");
115
+ qemu_mutex_unlock_iothread();
116
+
117
/* FIXME: This is unnecessary for periodic checkpoint mode */
118
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
119
&local_err);
120
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
121
}
122
123
vmstate_loading = false;
124
+ vm_start();
125
+ trace_colo_vm_state_change("stop", "run");
126
qemu_mutex_unlock_iothread();
127
128
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
129
diff --git a/migration/migration.c b/migration/migration.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/migration/migration.c
132
+++ b/migration/migration.c
133
@@ -XXX,XX +XXX,XX @@
134
/* Migration XBZRLE default cache size */
135
#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
136
137
-/* The delay time (in ms) between two COLO checkpoints
138
- * Note: Please change this default value to 10000 when we support hybrid mode.
139
- */
140
-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
141
+/* The delay time (in ms) between two COLO checkpoints */
142
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
143
#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
144
#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
145
146
--
147
2.5.0
148
149
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
Make sure master start block replication after slave's block
4
replication started.
5
6
Besides, we need to activate VM's blocks before goes into
7
COLO state.
8
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
migration/colo.c | 43 +++++++++++++++++++++++++++++++++++++++++++
16
migration/migration.c | 10 ++++++++++
17
2 files changed, 53 insertions(+)
18
19
diff --git a/migration/colo.c b/migration/colo.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/migration/colo.c
22
+++ b/migration/colo.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "replication.h"
25
#include "net/colo-compare.h"
26
#include "net/colo.h"
27
+#include "block/block.h"
28
29
static bool vmstate_loading;
30
static Notifier packets_compare_notifier;
31
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
32
{
33
int old_state;
34
MigrationIncomingState *mis = migration_incoming_get_current();
35
+ Error *local_err = NULL;
36
37
/* Can not do failover during the process of VM's loading VMstate, Or
38
* it will break the secondary VM.
39
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
40
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
41
MIGRATION_STATUS_COMPLETED);
42
43
+ replication_stop_all(true, &local_err);
44
+ if (local_err) {
45
+ error_report_err(local_err);
46
+ }
47
+
48
if (!autostart) {
49
error_report("\"-S\" qemu option will be ignored in secondary side");
50
/* recover runstate to normal migration finish state */
51
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
52
{
53
MigrationState *s = migrate_get_current();
54
int old_state;
55
+ Error *local_err = NULL;
56
57
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
58
MIGRATION_STATUS_COMPLETED);
59
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
60
FailoverStatus_str(old_state));
61
return;
62
}
63
+
64
+ replication_stop_all(true, &local_err);
65
+ if (local_err) {
66
+ error_report_err(local_err);
67
+ local_err = NULL;
68
+ }
69
+
70
/* Notify COLO thread that failover work is finished */
71
qemu_sem_post(&s->colo_exit_sem);
72
}
73
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
74
qemu_savevm_state_header(fb);
75
qemu_savevm_state_setup(fb);
76
qemu_mutex_lock_iothread();
77
+ replication_do_checkpoint_all(&local_err);
78
+ if (local_err) {
79
+ qemu_mutex_unlock_iothread();
80
+ goto out;
81
+ }
82
qemu_savevm_state_complete_precopy(fb, false, false);
83
qemu_mutex_unlock_iothread();
84
85
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
86
object_unref(OBJECT(bioc));
87
88
qemu_mutex_lock_iothread();
89
+ replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
90
+ if (local_err) {
91
+ qemu_mutex_unlock_iothread();
92
+ goto out;
93
+ }
94
+
95
vm_start();
96
qemu_mutex_unlock_iothread();
97
trace_colo_vm_state_change("stop", "run");
98
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
99
object_unref(OBJECT(bioc));
100
101
qemu_mutex_lock_iothread();
102
+ replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
103
+ if (local_err) {
104
+ qemu_mutex_unlock_iothread();
105
+ goto out;
106
+ }
107
vm_start();
108
trace_colo_vm_state_change("stop", "run");
109
qemu_mutex_unlock_iothread();
110
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
111
goto out;
112
}
113
114
+ replication_get_error_all(&local_err);
115
+ if (local_err) {
116
+ qemu_mutex_unlock_iothread();
117
+ goto out;
118
+ }
119
+ /* discard colo disk buffer */
120
+ replication_do_checkpoint_all(&local_err);
121
+ if (local_err) {
122
+ qemu_mutex_unlock_iothread();
123
+ goto out;
124
+ }
125
+
126
vmstate_loading = false;
127
vm_start();
128
trace_colo_vm_state_change("stop", "run");
129
diff --git a/migration/migration.c b/migration/migration.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/migration/migration.c
132
+++ b/migration/migration.c
133
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
134
MigrationIncomingState *mis = migration_incoming_get_current();
135
PostcopyState ps;
136
int ret;
137
+ Error *local_err = NULL;
138
139
assert(mis->from_src_file);
140
mis->migration_incoming_co = qemu_coroutine_self();
141
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
142
143
/* we get COLO info, and know if we are in COLO mode */
144
if (!ret && migration_incoming_enable_colo()) {
145
+ /* Make sure all file formats flush their mutable metadata */
146
+ bdrv_invalidate_cache_all(&local_err);
147
+ if (local_err) {
148
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
149
+ MIGRATION_STATUS_FAILED);
150
+ error_report_err(local_err);
151
+ exit(EXIT_FAILURE);
152
+ }
153
+
154
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
155
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
156
mis->have_colo_incoming_thread = true;
157
--
158
2.5.0
159
160
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We need to know if migration is going into COLO state for
4
incoming side before start normal migration.
5
6
Instead by using the VMStateDescription to send colo_state
7
from source side to destination side, we use MIG_CMD_ENABLE_COLO
8
to indicate whether COLO is enabled or not.
9
10
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
include/migration/colo.h | 5 ++--
17
migration/Makefile.objs | 2 +-
18
migration/colo-comm.c | 76 ------------------------------------------------
19
migration/colo.c | 13 ++++++++-
20
migration/migration.c | 23 ++++++++++++++-
21
migration/savevm.c | 17 +++++++++++
22
migration/savevm.h | 1 +
23
migration/trace-events | 1 +
24
vl.c | 2 --
25
9 files changed, 57 insertions(+), 83 deletions(-)
26
delete mode 100644 migration/colo-comm.c
27
28
diff --git a/include/migration/colo.h b/include/migration/colo.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/migration/colo.h
31
+++ b/include/migration/colo.h
32
@@ -XXX,XX +XXX,XX @@ void migrate_start_colo_process(MigrationState *s);
33
bool migration_in_colo_state(void);
34
35
/* loadvm */
36
-bool migration_incoming_enable_colo(void);
37
-void migration_incoming_exit_colo(void);
38
+void migration_incoming_enable_colo(void);
39
+void migration_incoming_disable_colo(void);
40
+bool migration_incoming_colo_enabled(void);
41
void *colo_process_incoming_thread(void *opaque);
42
bool migration_incoming_in_colo_state(void);
43
44
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
45
index XXXXXXX..XXXXXXX 100644
46
--- a/migration/Makefile.objs
47
+++ b/migration/Makefile.objs
48
@@ -XXX,XX +XXX,XX @@
49
common-obj-y += migration.o socket.o fd.o exec.o
50
common-obj-y += tls.o channel.o savevm.o
51
-common-obj-y += colo-comm.o colo.o colo-failover.o
52
+common-obj-y += colo.o colo-failover.o
53
common-obj-y += vmstate.o vmstate-types.o page_cache.o
54
common-obj-y += qemu-file.o global_state.o
55
common-obj-y += qemu-file-channel.o
56
diff --git a/migration/colo-comm.c b/migration/colo-comm.c
57
deleted file mode 100644
58
index XXXXXXX..XXXXXXX
59
--- a/migration/colo-comm.c
60
+++ /dev/null
61
@@ -XXX,XX +XXX,XX @@
62
-/*
63
- * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
64
- * (a.k.a. Fault Tolerance or Continuous Replication)
65
- *
66
- * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
67
- * Copyright (c) 2016 FUJITSU LIMITED
68
- * Copyright (c) 2016 Intel Corporation
69
- *
70
- * This work is licensed under the terms of the GNU GPL, version 2 or
71
- * later. See the COPYING file in the top-level directory.
72
- *
73
- */
74
-
75
-#include "qemu/osdep.h"
76
-#include "migration.h"
77
-#include "migration/colo.h"
78
-#include "migration/vmstate.h"
79
-#include "trace.h"
80
-
81
-typedef struct {
82
- bool colo_requested;
83
-} COLOInfo;
84
-
85
-static COLOInfo colo_info;
86
-
87
-COLOMode get_colo_mode(void)
88
-{
89
- if (migration_in_colo_state()) {
90
- return COLO_MODE_PRIMARY;
91
- } else if (migration_incoming_in_colo_state()) {
92
- return COLO_MODE_SECONDARY;
93
- } else {
94
- return COLO_MODE_UNKNOWN;
95
- }
96
-}
97
-
98
-static int colo_info_pre_save(void *opaque)
99
-{
100
- COLOInfo *s = opaque;
101
-
102
- s->colo_requested = migrate_colo_enabled();
103
-
104
- return 0;
105
-}
106
-
107
-static bool colo_info_need(void *opaque)
108
-{
109
- return migrate_colo_enabled();
110
-}
111
-
112
-static const VMStateDescription colo_state = {
113
- .name = "COLOState",
114
- .version_id = 1,
115
- .minimum_version_id = 1,
116
- .pre_save = colo_info_pre_save,
117
- .needed = colo_info_need,
118
- .fields = (VMStateField[]) {
119
- VMSTATE_BOOL(colo_requested, COLOInfo),
120
- VMSTATE_END_OF_LIST()
121
- },
122
-};
123
-
124
-void colo_info_init(void)
125
-{
126
- vmstate_register(NULL, 0, &colo_state, &colo_info);
127
-}
128
-
129
-bool migration_incoming_enable_colo(void)
130
-{
131
- return colo_info.colo_requested;
132
-}
133
-
134
-void migration_incoming_exit_colo(void)
135
-{
136
- colo_info.colo_requested = false;
137
-}
138
diff --git a/migration/colo.c b/migration/colo.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/migration/colo.c
141
+++ b/migration/colo.c
142
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
143
qemu_sem_post(&s->colo_exit_sem);
144
}
145
146
+COLOMode get_colo_mode(void)
147
+{
148
+ if (migration_in_colo_state()) {
149
+ return COLO_MODE_PRIMARY;
150
+ } else if (migration_incoming_in_colo_state()) {
151
+ return COLO_MODE_SECONDARY;
152
+ } else {
153
+ return COLO_MODE_UNKNOWN;
154
+ }
155
+}
156
+
157
void colo_do_failover(MigrationState *s)
158
{
159
/* Make sure VM stopped while failover happened. */
160
@@ -XXX,XX +XXX,XX @@ out:
161
if (mis->to_src_file) {
162
qemu_fclose(mis->to_src_file);
163
}
164
- migration_incoming_exit_colo();
165
+ migration_incoming_disable_colo();
166
167
rcu_unregister_thread();
168
return NULL;
169
diff --git a/migration/migration.c b/migration/migration.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/migration/migration.c
172
+++ b/migration/migration.c
173
@@ -XXX,XX +XXX,XX @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
174
return migrate_send_rp_message(mis, msg_type, msglen, bufc);
175
}
176
177
+static bool migration_colo_enabled;
178
+bool migration_incoming_colo_enabled(void)
179
+{
180
+ return migration_colo_enabled;
181
+}
182
+
183
+void migration_incoming_disable_colo(void)
184
+{
185
+ migration_colo_enabled = false;
186
+}
187
+
188
+void migration_incoming_enable_colo(void)
189
+{
190
+ migration_colo_enabled = true;
191
+}
192
+
193
void qemu_start_incoming_migration(const char *uri, Error **errp)
194
{
195
const char *p;
196
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
197
}
198
199
/* we get COLO info, and know if we are in COLO mode */
200
- if (!ret && migration_incoming_enable_colo()) {
201
+ if (!ret && migration_incoming_colo_enabled()) {
202
/* Make sure all file formats flush their mutable metadata */
203
bdrv_invalidate_cache_all(&local_err);
204
if (local_err) {
205
@@ -XXX,XX +XXX,XX @@ static void *migration_thread(void *opaque)
206
qemu_savevm_send_postcopy_advise(s->to_dst_file);
207
}
208
209
+ if (migrate_colo_enabled()) {
210
+ /* Notify migration destination that we enable COLO */
211
+ qemu_savevm_send_colo_enable(s->to_dst_file);
212
+ }
213
+
214
qemu_savevm_state_setup(s->to_dst_file);
215
216
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
217
diff --git a/migration/savevm.c b/migration/savevm.c
218
index XXXXXXX..XXXXXXX 100644
219
--- a/migration/savevm.c
220
+++ b/migration/savevm.c
221
@@ -XXX,XX +XXX,XX @@
222
#include "io/channel-file.h"
223
#include "sysemu/replay.h"
224
#include "qjson.h"
225
+#include "migration/colo.h"
226
227
#ifndef ETH_P_RARP
228
#define ETH_P_RARP 0x8035
229
@@ -XXX,XX +XXX,XX @@ enum qemu_vm_cmd {
230
were previously sent during
231
precopy but are dirty. */
232
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
233
+ MIG_CMD_ENABLE_COLO, /* Enable COLO */
234
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
235
MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
236
MIG_CMD_MAX
237
@@ -XXX,XX +XXX,XX @@ static void qemu_savevm_command_send(QEMUFile *f,
238
qemu_fflush(f);
239
}
240
241
+void qemu_savevm_send_colo_enable(QEMUFile *f)
242
+{
243
+ trace_savevm_send_colo_enable();
244
+ qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
245
+}
246
+
247
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
248
{
249
uint32_t buf;
250
@@ -XXX,XX +XXX,XX @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
251
return 0;
252
}
253
254
+static int loadvm_process_enable_colo(MigrationIncomingState *mis)
255
+{
256
+ migration_incoming_enable_colo();
257
+ return 0;
258
+}
259
+
260
/*
261
* Process an incoming 'QEMU_VM_COMMAND'
262
* 0 just a normal return
263
@@ -XXX,XX +XXX,XX @@ static int loadvm_process_command(QEMUFile *f)
264
265
case MIG_CMD_RECV_BITMAP:
266
return loadvm_handle_recv_bitmap(mis, len);
267
+
268
+ case MIG_CMD_ENABLE_COLO:
269
+ return loadvm_process_enable_colo(mis);
270
}
271
272
return 0;
273
diff --git a/migration/savevm.h b/migration/savevm.h
274
index XXXXXXX..XXXXXXX 100644
275
--- a/migration/savevm.h
276
+++ b/migration/savevm.h
277
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
278
uint16_t len,
279
uint64_t *start_list,
280
uint64_t *length_list);
281
+void qemu_savevm_send_colo_enable(QEMUFile *f);
282
283
int qemu_loadvm_state(QEMUFile *f);
284
void qemu_loadvm_state_cleanup(void);
285
diff --git a/migration/trace-events b/migration/trace-events
286
index XXXXXXX..XXXXXXX 100644
287
--- a/migration/trace-events
288
+++ b/migration/trace-events
289
@@ -XXX,XX +XXX,XX @@ savevm_send_ping(uint32_t val) "0x%x"
290
savevm_send_postcopy_listen(void) ""
291
savevm_send_postcopy_run(void) ""
292
savevm_send_postcopy_resume(void) ""
293
+savevm_send_colo_enable(void) ""
294
savevm_send_recv_bitmap(char *name) "%s"
295
savevm_state_setup(void) ""
296
savevm_state_resume_prepare(void) ""
297
diff --git a/vl.c b/vl.c
298
index XXXXXXX..XXXXXXX 100644
299
--- a/vl.c
300
+++ b/vl.c
301
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
302
#endif
303
}
304
305
- colo_info_init();
306
-
307
if (net_init_clients(&err) < 0) {
308
error_report_err(err);
309
exit(1);
310
--
311
2.5.0
312
313
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We should not load PVM's state directly into SVM, because there maybe some
4
errors happen when SVM is receving data, which will break SVM.
5
6
We need to ensure receving all data before load the state into SVM. We use
7
an extra memory to cache these data (PVM's ram). The ram cache in secondary side
8
is initially the same as SVM/PVM's memory. And in the process of checkpoint,
9
we cache the dirty pages of PVM into this ram cache firstly, so this ram cache
10
always the same as PVM's memory at every checkpoint, then we flush this cached ram
11
to SVM after we receive all PVM's state.
12
13
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
14
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
15
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
16
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
17
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
20
include/exec/ram_addr.h | 1 +
21
migration/migration.c | 7 +++++
22
migration/ram.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++--
23
migration/ram.h | 4 +++
24
migration/savevm.c | 2 +-
25
5 files changed, 94 insertions(+), 3 deletions(-)
26
27
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/exec/ram_addr.h
30
+++ b/include/exec/ram_addr.h
31
@@ -XXX,XX +XXX,XX @@ struct RAMBlock {
32
struct rcu_head rcu;
33
struct MemoryRegion *mr;
34
uint8_t *host;
35
+ uint8_t *colo_cache; /* For colo, VM's ram cache */
36
ram_addr_t offset;
37
ram_addr_t used_length;
38
ram_addr_t max_length;
39
diff --git a/migration/migration.c b/migration/migration.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/migration/migration.c
42
+++ b/migration/migration.c
43
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
44
exit(EXIT_FAILURE);
45
}
46
47
+ if (colo_init_ram_cache() < 0) {
48
+ error_report("Init ram cache failed");
49
+ exit(EXIT_FAILURE);
50
+ }
51
+
52
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
53
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
54
mis->have_colo_incoming_thread = true;
55
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
56
57
/* Wait checkpoint incoming thread exit before free resource */
58
qemu_thread_join(&mis->colo_incoming_thread);
59
+ /* We hold the global iothread lock, so it is safe here */
60
+ colo_release_ram_cache();
61
}
62
63
if (ret < 0) {
64
diff --git a/migration/ram.c b/migration/ram.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/migration/ram.c
67
+++ b/migration/ram.c
68
@@ -XXX,XX +XXX,XX @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
69
return block->host + offset;
70
}
71
72
+static inline void *colo_cache_from_block_offset(RAMBlock *block,
73
+ ram_addr_t offset)
74
+{
75
+ if (!offset_in_ramblock(block, offset)) {
76
+ return NULL;
77
+ }
78
+ if (!block->colo_cache) {
79
+ error_report("%s: colo_cache is NULL in block :%s",
80
+ __func__, block->idstr);
81
+ return NULL;
82
+ }
83
+ return block->colo_cache + offset;
84
+}
85
+
86
/**
87
* ram_handle_compressed: handle the zero page case
88
*
89
@@ -XXX,XX +XXX,XX @@ static void decompress_data_with_multi_threads(QEMUFile *f,
90
qemu_mutex_unlock(&decomp_done_lock);
91
}
92
93
+/*
94
+ * colo cache: this is for secondary VM, we cache the whole
95
+ * memory of the secondary VM, it is need to hold the global lock
96
+ * to call this helper.
97
+ */
98
+int colo_init_ram_cache(void)
99
+{
100
+ RAMBlock *block;
101
+
102
+ rcu_read_lock();
103
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
104
+ block->colo_cache = qemu_anon_ram_alloc(block->used_length,
105
+ NULL,
106
+ false);
107
+ if (!block->colo_cache) {
108
+ error_report("%s: Can't alloc memory for COLO cache of block %s,"
109
+ "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
110
+ block->used_length);
111
+ goto out_locked;
112
+ }
113
+ memcpy(block->colo_cache, block->host, block->used_length);
114
+ }
115
+ rcu_read_unlock();
116
+ return 0;
117
+
118
+out_locked:
119
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
120
+ if (block->colo_cache) {
121
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
122
+ block->colo_cache = NULL;
123
+ }
124
+ }
125
+
126
+ rcu_read_unlock();
127
+ return -errno;
128
+}
129
+
130
+/* It is need to hold the global lock to call this helper */
131
+void colo_release_ram_cache(void)
132
+{
133
+ RAMBlock *block;
134
+
135
+ rcu_read_lock();
136
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
137
+ if (block->colo_cache) {
138
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
139
+ block->colo_cache = NULL;
140
+ }
141
+ }
142
+ rcu_read_unlock();
143
+}
144
+
145
/**
146
* ram_load_setup: Setup RAM for migration incoming side
147
*
148
@@ -XXX,XX +XXX,XX @@ static int ram_load_setup(QEMUFile *f, void *opaque)
149
150
xbzrle_load_setup();
151
ramblock_recv_map_init();
152
+
153
return 0;
154
}
155
156
@@ -XXX,XX +XXX,XX @@ static int ram_load_cleanup(void *opaque)
157
g_free(rb->receivedmap);
158
rb->receivedmap = NULL;
159
}
160
+
161
return 0;
162
}
163
164
@@ -XXX,XX +XXX,XX @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
165
RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
166
RAMBlock *block = ram_block_from_stream(f, flags);
167
168
- host = host_from_ram_block_offset(block, addr);
169
+ /*
170
+ * After going into COLO, we should load the Page into colo_cache.
171
+ */
172
+ if (migration_incoming_in_colo_state()) {
173
+ host = colo_cache_from_block_offset(block, addr);
174
+ } else {
175
+ host = host_from_ram_block_offset(block, addr);
176
+ }
177
if (!host) {
178
error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
179
ret = -EINVAL;
180
break;
181
}
182
- ramblock_recv_bitmap_set(block, host);
183
+
184
+ if (!migration_incoming_in_colo_state()) {
185
+ ramblock_recv_bitmap_set(block, host);
186
+ }
187
+
188
trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
189
}
190
191
diff --git a/migration/ram.h b/migration/ram.h
192
index XXXXXXX..XXXXXXX 100644
193
--- a/migration/ram.h
194
+++ b/migration/ram.h
195
@@ -XXX,XX +XXX,XX @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
196
const char *block_name);
197
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
198
199
+/* ram cache */
200
+int colo_init_ram_cache(void);
201
+void colo_release_ram_cache(void);
202
+
203
#endif
204
diff --git a/migration/savevm.c b/migration/savevm.c
205
index XXXXXXX..XXXXXXX 100644
206
--- a/migration/savevm.c
207
+++ b/migration/savevm.c
208
@@ -XXX,XX +XXX,XX @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
209
static int loadvm_process_enable_colo(MigrationIncomingState *mis)
210
{
211
migration_incoming_enable_colo();
212
- return 0;
213
+ return colo_init_ram_cache();
214
}
215
216
/*
217
--
218
2.5.0
219
220
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We record the address of the dirty pages that received,
4
it will help flushing pages that cached into SVM.
5
6
Here, it is a trick, we record dirty pages by re-using migration
7
dirty bitmap. In the later patch, we will start the dirty log
8
for SVM, just like migration, in this way, we can record both
9
the dirty pages caused by PVM and SVM, we only flush those dirty
10
pages from RAM cache while do checkpoint.
11
12
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
migration/ram.c | 43 ++++++++++++++++++++++++++++++++++++++++---
19
1 file changed, 40 insertions(+), 3 deletions(-)
20
21
diff --git a/migration/ram.c b/migration/ram.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/migration/ram.c
24
+++ b/migration/ram.c
25
@@ -XXX,XX +XXX,XX @@ static inline void *colo_cache_from_block_offset(RAMBlock *block,
26
__func__, block->idstr);
27
return NULL;
28
}
29
+
30
+ /*
31
+ * During colo checkpoint, we need bitmap of these migrated pages.
32
+ * It help us to decide which pages in ram cache should be flushed
33
+ * into VM's RAM later.
34
+ */
35
+ if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
36
+ ram_state->migration_dirty_pages++;
37
+ }
38
return block->colo_cache + offset;
39
}
40
41
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
42
RAMBlock *block;
43
44
rcu_read_lock();
45
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
46
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
47
block->colo_cache = qemu_anon_ram_alloc(block->used_length,
48
NULL,
49
false);
50
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
51
memcpy(block->colo_cache, block->host, block->used_length);
52
}
53
rcu_read_unlock();
54
+ /*
55
+ * Record the dirty pages that sent by PVM, we use this dirty bitmap together
56
+ * with to decide which page in cache should be flushed into SVM's RAM. Here
57
+ * we use the same name 'ram_bitmap' as for migration.
58
+ */
59
+ if (ram_bytes_total()) {
60
+ RAMBlock *block;
61
+
62
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
63
+ unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
64
+
65
+ block->bmap = bitmap_new(pages);
66
+ bitmap_set(block->bmap, 0, pages);
67
+ }
68
+ }
69
+ ram_state = g_new0(RAMState, 1);
70
+ ram_state->migration_dirty_pages = 0;
71
+
72
return 0;
73
74
out_locked:
75
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
76
+
77
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
78
if (block->colo_cache) {
79
qemu_anon_ram_free(block->colo_cache, block->used_length);
80
block->colo_cache = NULL;
81
@@ -XXX,XX +XXX,XX @@ void colo_release_ram_cache(void)
82
{
83
RAMBlock *block;
84
85
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
86
+ g_free(block->bmap);
87
+ block->bmap = NULL;
88
+ }
89
+
90
rcu_read_lock();
91
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
92
+
93
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
94
if (block->colo_cache) {
95
qemu_anon_ram_free(block->colo_cache, block->used_length);
96
block->colo_cache = NULL;
97
}
98
}
99
+
100
rcu_read_unlock();
101
+ g_free(ram_state);
102
+ ram_state = NULL;
103
}
104
105
/**
106
--
107
2.5.0
108
109
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
During the time of VM's running, PVM may dirty some pages, we will transfer
4
PVM's dirty pages to SVM and store them into SVM's RAM cache at next checkpoint
5
time. So, the content of SVM's RAM cache will always be same with PVM's memory
6
after checkpoint.
7
8
Instead of flushing all content of PVM's RAM cache into SVM's MEMORY,
9
we do this in a more efficient way:
10
Only flush any page that dirtied by PVM since last checkpoint.
11
In this way, we can ensure SVM's memory same with PVM's.
12
13
Besides, we must ensure flush RAM cache before load device state.
14
15
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
16
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
17
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
20
migration/ram.c | 37 +++++++++++++++++++++++++++++++++++++
21
migration/trace-events | 2 ++
22
2 files changed, 39 insertions(+)
23
24
diff --git a/migration/ram.c b/migration/ram.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/migration/ram.c
27
+++ b/migration/ram.c
28
@@ -XXX,XX +XXX,XX @@ static bool postcopy_is_running(void)
29
return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
30
}
31
32
+/*
33
+ * Flush content of RAM cache into SVM's memory.
34
+ * Only flush the pages that be dirtied by PVM or SVM or both.
35
+ */
36
+static void colo_flush_ram_cache(void)
37
+{
38
+ RAMBlock *block = NULL;
39
+ void *dst_host;
40
+ void *src_host;
41
+ unsigned long offset = 0;
42
+
43
+ trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
44
+ rcu_read_lock();
45
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
46
+
47
+ while (block) {
48
+ offset = migration_bitmap_find_dirty(ram_state, block, offset);
49
+
50
+ if (offset << TARGET_PAGE_BITS >= block->used_length) {
51
+ offset = 0;
52
+ block = QLIST_NEXT_RCU(block, next);
53
+ } else {
54
+ migration_bitmap_clear_dirty(ram_state, block, offset);
55
+ dst_host = block->host + (offset << TARGET_PAGE_BITS);
56
+ src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
57
+ memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
58
+ }
59
+ }
60
+
61
+ rcu_read_unlock();
62
+ trace_colo_flush_ram_cache_end();
63
+}
64
+
65
static int ram_load(QEMUFile *f, void *opaque, int version_id)
66
{
67
int flags = 0, ret = 0, invalid_flags = 0;
68
@@ -XXX,XX +XXX,XX @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
69
ret |= wait_for_decompress_done();
70
rcu_read_unlock();
71
trace_ram_load_complete(ret, seq_iter);
72
+
73
+ if (!ret && migration_incoming_in_colo_state()) {
74
+ colo_flush_ram_cache();
75
+ }
76
return ret;
77
}
78
79
diff --git a/migration/trace-events b/migration/trace-events
80
index XXXXXXX..XXXXXXX 100644
81
--- a/migration/trace-events
82
+++ b/migration/trace-events
83
@@ -XXX,XX +XXX,XX @@ ram_dirty_bitmap_sync_start(void) ""
84
ram_dirty_bitmap_sync_wait(void) ""
85
ram_dirty_bitmap_sync_complete(void) ""
86
ram_state_resume_prepare(uint64_t v) "%" PRId64
87
+colo_flush_ram_cache_begin(uint64_t dirty_pages) "dirty_pages %" PRIu64
88
+colo_flush_ram_cache_end(void) ""
89
90
# migration/migration.c
91
await_return_path_close_on_source_close(void) ""
92
--
93
2.5.0
94
95
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
If some errors happen during VM's COLO FT stage, it's important to
4
notify the users of this event. Together with 'x-colo-lost-heartbeat',
5
Users can intervene in COLO's failover work immediately.
6
If users don't want to get involved in COLO's failover verdict,
7
it is still necessary to notify users that we exited COLO mode.
8
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
migration/colo.c | 31 +++++++++++++++++++++++++++++++
16
qapi/migration.json | 38 ++++++++++++++++++++++++++++++++++++++
17
2 files changed, 69 insertions(+)
18
19
diff --git a/migration/colo.c b/migration/colo.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/migration/colo.c
22
+++ b/migration/colo.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "net/colo-compare.h"
25
#include "net/colo.h"
26
#include "block/block.h"
27
+#include "qapi/qapi-events-migration.h"
28
29
static bool vmstate_loading;
30
static Notifier packets_compare_notifier;
31
@@ -XXX,XX +XXX,XX @@ out:
32
qemu_fclose(fb);
33
}
34
35
+ /*
36
+ * There are only two reasons we can get here, some error happened
37
+ * or the user triggered failover.
38
+ */
39
+ switch (failover_get_state()) {
40
+ case FAILOVER_STATUS_NONE:
41
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
42
+ COLO_EXIT_REASON_ERROR);
43
+ break;
44
+ case FAILOVER_STATUS_REQUIRE:
45
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
46
+ COLO_EXIT_REASON_REQUEST);
47
+ break;
48
+ default:
49
+ abort();
50
+ }
51
+
52
/* Hope this not to be too long to wait here */
53
qemu_sem_wait(&s->colo_exit_sem);
54
qemu_sem_destroy(&s->colo_exit_sem);
55
@@ -XXX,XX +XXX,XX @@ out:
56
error_report_err(local_err);
57
}
58
59
+ switch (failover_get_state()) {
60
+ case FAILOVER_STATUS_NONE:
61
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
62
+ COLO_EXIT_REASON_ERROR);
63
+ break;
64
+ case FAILOVER_STATUS_REQUIRE:
65
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
66
+ COLO_EXIT_REASON_REQUEST);
67
+ break;
68
+ default:
69
+ abort();
70
+ }
71
+
72
if (fb) {
73
qemu_fclose(fb);
74
}
75
diff --git a/qapi/migration.json b/qapi/migration.json
76
index XXXXXXX..XXXXXXX 100644
77
--- a/qapi/migration.json
78
+++ b/qapi/migration.json
79
@@ -XXX,XX +XXX,XX @@
80
'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] }
81
82
##
83
+# @COLO_EXIT:
84
+#
85
+# Emitted when VM finishes COLO mode due to some errors happening or
86
+# at the request of users.
87
+#
88
+# @mode: report COLO mode when COLO exited.
89
+#
90
+# @reason: describes the reason for the COLO exit.
91
+#
92
+# Since: 3.1
93
+#
94
+# Example:
95
+#
96
+# <- { "timestamp": {"seconds": 2032141960, "microseconds": 417172},
97
+# "event": "COLO_EXIT", "data": {"mode": "primary", "reason": "request" } }
98
+#
99
+##
100
+{ 'event': 'COLO_EXIT',
101
+ 'data': {'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
102
+
103
+##
104
+# @COLOExitReason:
105
+#
106
+# The reason for a COLO exit
107
+#
108
+# @none: no failover has ever happened. This can't occur in the
109
+# COLO_EXIT event, only in the result of query-colo-status.
110
+#
111
+# @request: COLO exit is due to an external request
112
+#
113
+# @error: COLO exit is due to an internal error
114
+#
115
+# Since: 3.1
116
+##
117
+{ 'enum': 'COLOExitReason',
118
+ 'data': [ 'none', 'request', 'error' ] }
119
+
120
+##
121
# @x-colo-lost-heartbeat:
122
#
123
# Tell qemu that heartbeat is lost, request it to do takeover procedures.
124
--
125
2.5.0
126
127
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <chen.zhang@intel.com>
2
1
3
Suggested by Markus Armbruster rename COLO unknown mode to none mode.
4
5
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
6
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo-failover.c | 2 +-
12
migration/colo.c | 2 +-
13
qapi/migration.json | 10 +++++-----
14
3 files changed, 7 insertions(+), 7 deletions(-)
15
16
diff --git a/migration/colo-failover.c b/migration/colo-failover.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/migration/colo-failover.c
19
+++ b/migration/colo-failover.c
20
@@ -XXX,XX +XXX,XX @@ FailoverStatus failover_get_state(void)
21
22
void qmp_x_colo_lost_heartbeat(Error **errp)
23
{
24
- if (get_colo_mode() == COLO_MODE_UNKNOWN) {
25
+ if (get_colo_mode() == COLO_MODE_NONE) {
26
error_setg(errp, QERR_FEATURE_DISABLED, "colo");
27
return;
28
}
29
diff --git a/migration/colo.c b/migration/colo.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/migration/colo.c
32
+++ b/migration/colo.c
33
@@ -XXX,XX +XXX,XX @@ COLOMode get_colo_mode(void)
34
} else if (migration_incoming_in_colo_state()) {
35
return COLO_MODE_SECONDARY;
36
} else {
37
- return COLO_MODE_UNKNOWN;
38
+ return COLO_MODE_NONE;
39
}
40
}
41
42
diff --git a/qapi/migration.json b/qapi/migration.json
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qapi/migration.json
45
+++ b/qapi/migration.json
46
@@ -XXX,XX +XXX,XX @@
47
##
48
# @COLOMode:
49
#
50
-# The colo mode
51
+# The COLO current mode.
52
#
53
-# @unknown: unknown mode
54
+# @none: COLO is disabled.
55
#
56
-# @primary: master side
57
+# @primary: COLO node in primary side.
58
#
59
-# @secondary: slave side
60
+# @secondary: COLO node in slave side.
61
#
62
# Since: 2.8
63
##
64
{ 'enum': 'COLOMode',
65
- 'data': [ 'unknown', 'primary', 'secondary'] }
66
+ 'data': [ 'none', 'primary', 'secondary'] }
67
68
##
69
# @FailoverStatus:
70
--
71
2.5.0
72
73
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
Libvirt or other high level software can use this command query colo status.
4
You can test this command like that:
5
{'execute':'query-colo-status'}
6
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo.c | 21 +++++++++++++++++++++
12
qapi/migration.json | 32 ++++++++++++++++++++++++++++++++
13
2 files changed, 53 insertions(+)
14
15
diff --git a/migration/colo.c b/migration/colo.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/migration/colo.c
18
+++ b/migration/colo.c
19
@@ -XXX,XX +XXX,XX @@
20
#include "net/colo.h"
21
#include "block/block.h"
22
#include "qapi/qapi-events-migration.h"
23
+#include "qapi/qmp/qerror.h"
24
25
static bool vmstate_loading;
26
static Notifier packets_compare_notifier;
27
@@ -XXX,XX +XXX,XX @@ void qmp_xen_colo_do_checkpoint(Error **errp)
28
#endif
29
}
30
31
+COLOStatus *qmp_query_colo_status(Error **errp)
32
+{
33
+ COLOStatus *s = g_new0(COLOStatus, 1);
34
+
35
+ s->mode = get_colo_mode();
36
+
37
+ switch (failover_get_state()) {
38
+ case FAILOVER_STATUS_NONE:
39
+ s->reason = COLO_EXIT_REASON_NONE;
40
+ break;
41
+ case FAILOVER_STATUS_REQUIRE:
42
+ s->reason = COLO_EXIT_REASON_REQUEST;
43
+ break;
44
+ default:
45
+ s->reason = COLO_EXIT_REASON_ERROR;
46
+ }
47
+
48
+ return s;
49
+}
50
+
51
static void colo_send_message(QEMUFile *f, COLOMessage msg,
52
Error **errp)
53
{
54
diff --git a/qapi/migration.json b/qapi/migration.json
55
index XXXXXXX..XXXXXXX 100644
56
--- a/qapi/migration.json
57
+++ b/qapi/migration.json
58
@@ -XXX,XX +XXX,XX @@
59
{ 'command': 'xen-colo-do-checkpoint' }
60
61
##
62
+# @COLOStatus:
63
+#
64
+# The result format for 'query-colo-status'.
65
+#
66
+# @mode: COLO running mode. If COLO is running, this field will return
67
+# 'primary' or 'secondary'.
68
+#
69
+# @reason: describes the reason for the COLO exit.
70
+#
71
+# Since: 3.0
72
+##
73
+{ 'struct': 'COLOStatus',
74
+ 'data': { 'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
75
+
76
+##
77
+# @query-colo-status:
78
+#
79
+# Query COLO status while the vm is running.
80
+#
81
+# Returns: A @COLOStatus object showing the status.
82
+#
83
+# Example:
84
+#
85
+# -> { "execute": "query-colo-status" }
86
+# <- { "return": { "mode": "primary", "active": true, "reason": "request" } }
87
+#
88
+# Since: 3.0
89
+##
90
+{ 'command': 'query-colo-status',
91
+ 'returns': 'COLOStatus' }
92
+
93
+##
94
# @migrate-recover:
95
#
96
# Provide a recovery migration stream URI.
97
--
98
2.5.0
99
100
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
There are several stages during loadvm/savevm process. In different stage,
4
migration incoming processes different types of sections.
5
We want to control these stages more accuracy, it will benefit COLO
6
performance, we don't have to save type of QEMU_VM_SECTION_START
7
sections everytime while do checkpoint, besides, we want to separate
8
the process of saving/loading memory and devices state.
9
10
So we add three new helper functions: qemu_load_device_state() and
11
qemu_savevm_live_state() to achieve different process during migration.
12
13
Besides, we make qemu_loadvm_state_main() and qemu_save_device_state()
14
public, and simplify the codes of qemu_save_device_state() by calling the
15
wrapper qemu_savevm_state_header().
16
17
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
18
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
19
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
20
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
21
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
22
Signed-off-by: Jason Wang <jasowang@redhat.com>
23
---
24
migration/colo.c | 41 ++++++++++++++++++++++++++++++++---------
25
migration/savevm.c | 36 +++++++++++++++++++++++++++++-------
26
migration/savevm.h | 4 ++++
27
3 files changed, 65 insertions(+), 16 deletions(-)
28
29
diff --git a/migration/colo.c b/migration/colo.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/migration/colo.c
32
+++ b/migration/colo.c
33
@@ -XXX,XX +XXX,XX @@
34
#include "block/block.h"
35
#include "qapi/qapi-events-migration.h"
36
#include "qapi/qmp/qerror.h"
37
+#include "sysemu/cpus.h"
38
39
static bool vmstate_loading;
40
static Notifier packets_compare_notifier;
41
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
42
43
/* Disable block migration */
44
migrate_set_block_enabled(false, &local_err);
45
- qemu_savevm_state_header(fb);
46
- qemu_savevm_state_setup(fb);
47
qemu_mutex_lock_iothread();
48
replication_do_checkpoint_all(&local_err);
49
if (local_err) {
50
qemu_mutex_unlock_iothread();
51
goto out;
52
}
53
- qemu_savevm_state_complete_precopy(fb, false, false);
54
- qemu_mutex_unlock_iothread();
55
-
56
- qemu_fflush(fb);
57
58
colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
59
if (local_err) {
60
+ qemu_mutex_unlock_iothread();
61
+ goto out;
62
+ }
63
+ /* Note: device state is saved into buffer */
64
+ ret = qemu_save_device_state(fb);
65
+
66
+ qemu_mutex_unlock_iothread();
67
+ if (ret < 0) {
68
goto out;
69
}
70
/*
71
+ * Only save VM's live state, which not including device state.
72
+ * TODO: We may need a timeout mechanism to prevent COLO process
73
+ * to be blocked here.
74
+ */
75
+ qemu_savevm_live_state(s->to_dst_file);
76
+
77
+ qemu_fflush(fb);
78
+
79
+ /*
80
* We need the size of the VMstate data in Secondary side,
81
* With which we can decide how much data should be read.
82
*/
83
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
84
uint64_t total_size;
85
uint64_t value;
86
Error *local_err = NULL;
87
+ int ret;
88
89
rcu_register_thread();
90
qemu_sem_init(&mis->colo_incoming_sem, 0);
91
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
92
goto out;
93
}
94
95
+ qemu_mutex_lock_iothread();
96
+ cpu_synchronize_all_pre_loadvm();
97
+ ret = qemu_loadvm_state_main(mis->from_src_file, mis);
98
+ qemu_mutex_unlock_iothread();
99
+
100
+ if (ret < 0) {
101
+ error_report("Load VM's live state (ram) error");
102
+ goto out;
103
+ }
104
+
105
value = colo_receive_message_value(mis->from_src_file,
106
COLO_MESSAGE_VMSTATE_SIZE, &local_err);
107
if (local_err) {
108
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
109
}
110
111
qemu_mutex_lock_iothread();
112
- qemu_system_reset(SHUTDOWN_CAUSE_NONE);
113
vmstate_loading = true;
114
- if (qemu_loadvm_state(fb) < 0) {
115
- error_report("COLO: loadvm failed");
116
+ ret = qemu_load_device_state(fb);
117
+ if (ret < 0) {
118
+ error_report("COLO: load device state failed");
119
qemu_mutex_unlock_iothread();
120
goto out;
121
}
122
diff --git a/migration/savevm.c b/migration/savevm.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/migration/savevm.c
125
+++ b/migration/savevm.c
126
@@ -XXX,XX +XXX,XX @@ done:
127
return ret;
128
}
129
130
-static int qemu_save_device_state(QEMUFile *f)
131
+void qemu_savevm_live_state(QEMUFile *f)
132
{
133
- SaveStateEntry *se;
134
+ /* save QEMU_VM_SECTION_END section */
135
+ qemu_savevm_state_complete_precopy(f, true, false);
136
+ qemu_put_byte(f, QEMU_VM_EOF);
137
+}
138
139
- qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
140
- qemu_put_be32(f, QEMU_VM_FILE_VERSION);
141
+int qemu_save_device_state(QEMUFile *f)
142
+{
143
+ SaveStateEntry *se;
144
145
+ if (!migration_in_colo_state()) {
146
+ qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
147
+ qemu_put_be32(f, QEMU_VM_FILE_VERSION);
148
+ }
149
cpu_synchronize_all_states();
150
151
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
152
@@ -XXX,XX +XXX,XX @@ enum LoadVMExitCodes {
153
LOADVM_QUIT = 1,
154
};
155
156
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
157
-
158
/* ------ incoming postcopy messages ------ */
159
/* 'advise' arrives before any transfers just to tell us that a postcopy
160
* *might* happen - it might be skipped if precopy transferred everything
161
@@ -XXX,XX +XXX,XX @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
162
return true;
163
}
164
165
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
166
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
167
{
168
uint8_t section_type;
169
int ret = 0;
170
@@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f)
171
return ret;
172
}
173
174
+int qemu_load_device_state(QEMUFile *f)
175
+{
176
+ MigrationIncomingState *mis = migration_incoming_get_current();
177
+ int ret;
178
+
179
+ /* Load QEMU_VM_SECTION_FULL section */
180
+ ret = qemu_loadvm_state_main(f, mis);
181
+ if (ret < 0) {
182
+ error_report("Failed to load device state: %d", ret);
183
+ return ret;
184
+ }
185
+
186
+ cpu_synchronize_all_post_init();
187
+ return 0;
188
+}
189
+
190
int save_snapshot(const char *name, Error **errp)
191
{
192
BlockDriverState *bs, *bs1;
193
diff --git a/migration/savevm.h b/migration/savevm.h
194
index XXXXXXX..XXXXXXX 100644
195
--- a/migration/savevm.h
196
+++ b/migration/savevm.h
197
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
198
uint64_t *start_list,
199
uint64_t *length_list);
200
void qemu_savevm_send_colo_enable(QEMUFile *f);
201
+void qemu_savevm_live_state(QEMUFile *f);
202
+int qemu_save_device_state(QEMUFile *f);
203
204
int qemu_loadvm_state(QEMUFile *f);
205
void qemu_loadvm_state_cleanup(void);
206
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
207
+int qemu_load_device_state(QEMUFile *f);
208
209
#endif
210
--
211
2.5.0
212
213
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
Don't need to flush all VM's ram from cache, only
4
flush the dirty pages since last checkpoint
5
6
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
migration/ram.c | 9 +++++++++
14
1 file changed, 9 insertions(+)
15
16
diff --git a/migration/ram.c b/migration/ram.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/migration/ram.c
19
+++ b/migration/ram.c
20
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
21
}
22
ram_state = g_new0(RAMState, 1);
23
ram_state->migration_dirty_pages = 0;
24
+ memory_global_dirty_log_start();
25
26
return 0;
27
28
@@ -XXX,XX +XXX,XX @@ void colo_release_ram_cache(void)
29
{
30
RAMBlock *block;
31
32
+ memory_global_dirty_log_stop();
33
RAMBLOCK_FOREACH_MIGRATABLE(block) {
34
g_free(block->bmap);
35
block->bmap = NULL;
36
@@ -XXX,XX +XXX,XX @@ static void colo_flush_ram_cache(void)
37
void *src_host;
38
unsigned long offset = 0;
39
40
+ memory_global_dirty_log_sync();
41
+ rcu_read_lock();
42
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
43
+ migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
44
+ }
45
+ rcu_read_unlock();
46
+
47
trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
48
rcu_read_lock();
49
block = QLIST_FIRST_RCU(&ram_list.blocks);
50
--
51
2.5.0
52
53
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
Filter needs to process the event of checkpoint/failover or
4
other event passed by COLO frame.
5
6
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
include/net/filter.h | 5 +++++
12
net/filter.c | 17 +++++++++++++++++
13
net/net.c | 19 +++++++++++++++++++
14
3 files changed, 41 insertions(+)
15
16
diff --git a/include/net/filter.h b/include/net/filter.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/net/filter.h
19
+++ b/include/net/filter.h
20
@@ -XXX,XX +XXX,XX @@ typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc,
21
22
typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp);
23
24
+typedef void (FilterHandleEvent) (NetFilterState *nf, int event, Error **errp);
25
+
26
typedef struct NetFilterClass {
27
ObjectClass parent_class;
28
29
@@ -XXX,XX +XXX,XX @@ typedef struct NetFilterClass {
30
FilterSetup *setup;
31
FilterCleanup *cleanup;
32
FilterStatusChanged *status_changed;
33
+ FilterHandleEvent *handle_event;
34
/* mandatory */
35
FilterReceiveIOV *receive_iov;
36
} NetFilterClass;
37
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
38
int iovcnt,
39
void *opaque);
40
41
+void colo_notify_filters_event(int event, Error **errp);
42
+
43
#endif /* QEMU_NET_FILTER_H */
44
diff --git a/net/filter.c b/net/filter.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/net/filter.c
47
+++ b/net/filter.c
48
@@ -XXX,XX +XXX,XX @@
49
#include "net/vhost_net.h"
50
#include "qom/object_interfaces.h"
51
#include "qemu/iov.h"
52
+#include "net/colo.h"
53
+#include "migration/colo.h"
54
55
static inline bool qemu_can_skip_netfilter(NetFilterState *nf)
56
{
57
@@ -XXX,XX +XXX,XX @@ static void netfilter_finalize(Object *obj)
58
g_free(nf->netdev_id);
59
}
60
61
+static void default_handle_event(NetFilterState *nf, int event, Error **errp)
62
+{
63
+ switch (event) {
64
+ case COLO_EVENT_CHECKPOINT:
65
+ break;
66
+ case COLO_EVENT_FAILOVER:
67
+ object_property_set_str(OBJECT(nf), "off", "status", errp);
68
+ break;
69
+ default:
70
+ break;
71
+ }
72
+}
73
+
74
static void netfilter_class_init(ObjectClass *oc, void *data)
75
{
76
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
77
+ NetFilterClass *nfc = NETFILTER_CLASS(oc);
78
79
ucc->complete = netfilter_complete;
80
+ nfc->handle_event = default_handle_event;
81
}
82
83
static const TypeInfo netfilter_info = {
84
diff --git a/net/net.c b/net/net.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/net/net.c
87
+++ b/net/net.c
88
@@ -XXX,XX +XXX,XX @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
89
}
90
}
91
92
+void colo_notify_filters_event(int event, Error **errp)
93
+{
94
+ NetClientState *nc;
95
+ NetFilterState *nf;
96
+ NetFilterClass *nfc = NULL;
97
+ Error *local_err = NULL;
98
+
99
+ QTAILQ_FOREACH(nc, &net_clients, next) {
100
+ QTAILQ_FOREACH(nf, &nc->filters, next) {
101
+ nfc = NETFILTER_GET_CLASS(OBJECT(nf));
102
+ nfc->handle_event(nf, event, &local_err);
103
+ if (local_err) {
104
+ error_propagate(errp, local_err);
105
+ return;
106
+ }
107
+ }
108
+ }
109
+}
110
+
111
void qmp_set_link(const char *name, bool up, Error **errp)
112
{
113
NetClientState *ncs[MAX_QUEUE_NUM];
114
--
115
2.5.0
116
117
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
After one round of checkpoint, the states between PVM and SVM
4
become consistent, so it is unnecessary to adjust the sequence
5
of net packets for old connections, besides, while failover
6
happens, filter-rewriter will into failover mode that needn't
7
handle the new TCP connection.
8
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
11
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
net/colo-compare.c | 12 +++++------
15
net/colo.c | 8 ++++++++
16
net/colo.h | 2 ++
17
net/filter-rewriter.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
18
4 files changed, 73 insertions(+), 6 deletions(-)
19
20
diff --git a/net/colo-compare.c b/net/colo-compare.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/net/colo-compare.c
23
+++ b/net/colo-compare.c
24
@@ -XXX,XX +XXX,XX @@ enum {
25
SECONDARY_IN,
26
};
27
28
+static void colo_compare_inconsistency_notify(void)
29
+{
30
+ notifier_list_notify(&colo_compare_notifiers,
31
+ migrate_get_current());
32
+}
33
+
34
static int compare_chr_send(CompareState *s,
35
const uint8_t *buf,
36
uint32_t size,
37
@@ -XXX,XX +XXX,XX @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
38
return false;
39
}
40
41
-static void colo_compare_inconsistency_notify(void)
42
-{
43
- notifier_list_notify(&colo_compare_notifiers,
44
- migrate_get_current());
45
-}
46
-
47
static void colo_compare_tcp(CompareState *s, Connection *conn)
48
{
49
Packet *ppkt = NULL, *spkt = NULL;
50
diff --git a/net/colo.c b/net/colo.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/net/colo.c
53
+++ b/net/colo.c
54
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
55
56
return conn;
57
}
58
+
59
+bool connection_has_tracked(GHashTable *connection_track_table,
60
+ ConnectionKey *key)
61
+{
62
+ Connection *conn = g_hash_table_lookup(connection_track_table, key);
63
+
64
+ return conn ? true : false;
65
+}
66
diff --git a/net/colo.h b/net/colo.h
67
index XXXXXXX..XXXXXXX 100644
68
--- a/net/colo.h
69
+++ b/net/colo.h
70
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque);
71
Connection *connection_get(GHashTable *connection_track_table,
72
ConnectionKey *key,
73
GQueue *conn_list);
74
+bool connection_has_tracked(GHashTable *connection_track_table,
75
+ ConnectionKey *key);
76
void connection_hashtable_reset(GHashTable *connection_track_table);
77
Packet *packet_new(const void *data, int size, int vnet_hdr_len);
78
void packet_destroy(void *opaque, void *user_data);
79
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/net/filter-rewriter.c
82
+++ b/net/filter-rewriter.c
83
@@ -XXX,XX +XXX,XX @@
84
#include "qemu/main-loop.h"
85
#include "qemu/iov.h"
86
#include "net/checksum.h"
87
+#include "net/colo.h"
88
+#include "migration/colo.h"
89
90
#define FILTER_COLO_REWRITER(obj) \
91
OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
92
93
#define TYPE_FILTER_REWRITER "filter-rewriter"
94
+#define FAILOVER_MODE_ON true
95
+#define FAILOVER_MODE_OFF false
96
97
typedef struct RewriterState {
98
NetFilterState parent_obj;
99
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
100
/* hashtable to save connection */
101
GHashTable *connection_track_table;
102
bool vnet_hdr;
103
+ bool failover_mode;
104
} RewriterState;
105
106
+static void filter_rewriter_failover_mode(RewriterState *s)
107
+{
108
+ s->failover_mode = FAILOVER_MODE_ON;
109
+}
110
+
111
static void filter_rewriter_flush(NetFilterState *nf)
112
{
113
RewriterState *s = FILTER_COLO_REWRITER(nf);
114
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
115
*/
116
reverse_connection_key(&key);
117
}
118
+
119
+ /* After failover we needn't change new TCP packet */
120
+ if (s->failover_mode &&
121
+ !connection_has_tracked(s->connection_track_table, &key)) {
122
+ goto out;
123
+ }
124
+
125
conn = connection_get(s->connection_track_table,
126
&key,
127
NULL);
128
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
129
}
130
}
131
132
+out:
133
packet_destroy(pkt, NULL);
134
pkt = NULL;
135
return 0;
136
}
137
138
+static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data)
139
+{
140
+ Connection *conn = (Connection *)value;
141
+
142
+ conn->offset = 0;
143
+}
144
+
145
+static gboolean offset_is_nonzero(gpointer key,
146
+ gpointer value,
147
+ gpointer user_data)
148
+{
149
+ Connection *conn = (Connection *)value;
150
+
151
+ return conn->offset ? true : false;
152
+}
153
+
154
+static void colo_rewriter_handle_event(NetFilterState *nf, int event,
155
+ Error **errp)
156
+{
157
+ RewriterState *rs = FILTER_COLO_REWRITER(nf);
158
+
159
+ switch (event) {
160
+ case COLO_EVENT_CHECKPOINT:
161
+ g_hash_table_foreach(rs->connection_track_table,
162
+ reset_seq_offset, NULL);
163
+ break;
164
+ case COLO_EVENT_FAILOVER:
165
+ if (!g_hash_table_find(rs->connection_track_table,
166
+ offset_is_nonzero, NULL)) {
167
+ filter_rewriter_failover_mode(rs);
168
+ }
169
+ break;
170
+ default:
171
+ break;
172
+ }
173
+}
174
+
175
static void colo_rewriter_cleanup(NetFilterState *nf)
176
{
177
RewriterState *s = FILTER_COLO_REWRITER(nf);
178
@@ -XXX,XX +XXX,XX @@ static void filter_rewriter_init(Object *obj)
179
RewriterState *s = FILTER_COLO_REWRITER(obj);
180
181
s->vnet_hdr = false;
182
+ s->failover_mode = FAILOVER_MODE_OFF;
183
object_property_add_bool(obj, "vnet_hdr_support",
184
filter_rewriter_get_vnet_hdr,
185
filter_rewriter_set_vnet_hdr, NULL);
186
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_class_init(ObjectClass *oc, void *data)
187
nfc->setup = colo_rewriter_setup;
188
nfc->cleanup = colo_rewriter_cleanup;
189
nfc->receive_iov = colo_rewriter_receive_iov;
190
+ nfc->handle_event = colo_rewriter_handle_event;
191
}
192
193
static const TypeInfo colo_rewriter_info = {
194
--
195
2.5.0
196
197
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
Notify all net filters about the checkpoint and failover event.
4
5
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
6
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
migration/colo.c | 15 +++++++++++++++
10
1 file changed, 15 insertions(+)
11
12
diff --git a/migration/colo.c b/migration/colo.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/migration/colo.c
15
+++ b/migration/colo.c
16
@@ -XXX,XX +XXX,XX @@
17
#include "qapi/qapi-events-migration.h"
18
#include "qapi/qmp/qerror.h"
19
#include "sysemu/cpus.h"
20
+#include "net/filter.h"
21
22
static bool vmstate_loading;
23
static Notifier packets_compare_notifier;
24
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
25
error_report_err(local_err);
26
}
27
28
+ /* Notify all filters of all NIC to do checkpoint */
29
+ colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
30
+ if (local_err) {
31
+ error_report_err(local_err);
32
+ }
33
+
34
if (!autostart) {
35
error_report("\"-S\" qemu option will be ignored in secondary side");
36
/* recover runstate to normal migration finish state */
37
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
38
goto out;
39
}
40
41
+ /* Notify all filters of all NIC to do checkpoint */
42
+ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
43
+
44
+ if (local_err) {
45
+ qemu_mutex_unlock_iothread();
46
+ goto out;
47
+ }
48
+
49
vmstate_loading = false;
50
vm_start();
51
trace_colo_vm_state_change("stop", "run");
52
--
53
2.5.0
54
55
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
COLO thread may sleep at qemu_sem_wait(&s->colo_checkpoint_sem),
4
while failover works begin, It's better to wakeup it to quick
5
the process.
6
7
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
8
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo.c | 8 ++++++++
12
1 file changed, 8 insertions(+)
13
14
diff --git a/migration/colo.c b/migration/colo.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/migration/colo.c
17
+++ b/migration/colo.c
18
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
19
20
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
21
MIGRATION_STATUS_COMPLETED);
22
+ /*
23
+ * kick COLO thread which might wait at
24
+ * qemu_sem_wait(&s->colo_checkpoint_sem).
25
+ */
26
+ colo_checkpoint_notify(migrate_get_current());
27
28
/*
29
* Wake up COLO thread which may blocked in recv() or send(),
30
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
31
32
qemu_sem_wait(&s->colo_checkpoint_sem);
33
34
+ if (s->state != MIGRATION_STATUS_COLO) {
35
+ goto out;
36
+ }
37
ret = colo_do_checkpoint_transaction(s, bioc, fb);
38
if (ret < 0) {
39
goto out;
40
--
41
2.5.0
42
43
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <chen.zhang@intel.com>
2
1
3
This diagram make user better understand COLO.
4
Suggested by Markus Armbruster.
5
6
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
docs/COLO-FT.txt | 34 ++++++++++++++++++++++++++++++++++
11
1 file changed, 34 insertions(+)
12
13
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
14
index XXXXXXX..XXXXXXX 100644
15
--- a/docs/COLO-FT.txt
16
+++ b/docs/COLO-FT.txt
17
@@ -XXX,XX +XXX,XX @@ Note:
18
HeartBeat has not been implemented yet, so you need to trigger failover process
19
by using 'x-colo-lost-heartbeat' command.
20
21
+== COLO operation status ==
22
+
23
++-----------------+
24
+| |
25
+| Start COLO |
26
+| |
27
++--------+--------+
28
+ |
29
+ | Main qmp command:
30
+ | migrate-set-capabilities with x-colo
31
+ | migrate
32
+ |
33
+ v
34
++--------+--------+
35
+| |
36
+| COLO running |
37
+| |
38
++--------+--------+
39
+ |
40
+ | Main qmp command:
41
+ | x-colo-lost-heartbeat
42
+ | or
43
+ | some error happened
44
+ v
45
++--------+--------+
46
+| | send qmp event:
47
+| COLO failover | COLO_EXIT
48
+| |
49
++-----------------+
50
+
51
+COLO use the qmp command to switch and report operation status.
52
+The diagram just shows the main qmp command, you can get the detail
53
+in test procedure.
54
+
55
== Test procedure ==
56
1. Startup qemu
57
Primary:
58
--
59
2.5.0
60
61
diff view generated by jsdifflib
Deleted patch
1
From: liujunjie <liujunjie23@huawei.com>
2
1
3
Before, we did not clear callback like handle_output when delete
4
the virtqueue which may result be segmentfault.
5
The scene is as follows:
6
1. Start a vm with multiqueue vhost-net,
7
2. then we write VIRTIO_PCI_GUEST_FEATURES in PCI configuration to
8
triger multiqueue disable in this vm which will delete the virtqueue.
9
In this step, the tx_bh is deleted but the callback virtio_net_handle_tx_bh
10
still exist.
11
3. Finally, we write VIRTIO_PCI_QUEUE_NOTIFY in PCI configuration to
12
notify the deleted virtqueue. In this way, virtio_net_handle_tx_bh
13
will be called and qemu will be crashed.
14
15
Although the way described above is uncommon, we had better reinforce it.
16
17
CC: qemu-stable@nongnu.org
18
Signed-off-by: liujunjie <liujunjie23@huawei.com>
19
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
---
21
hw/virtio/virtio.c | 2 ++
22
1 file changed, 2 insertions(+)
23
24
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/virtio/virtio.c
27
+++ b/hw/virtio/virtio.c
28
@@ -XXX,XX +XXX,XX @@ void virtio_del_queue(VirtIODevice *vdev, int n)
29
30
vdev->vq[n].vring.num = 0;
31
vdev->vq[n].vring.num_default = 0;
32
+ vdev->vq[n].handle_output = NULL;
33
+ vdev->vq[n].handle_aio_output = NULL;
34
}
35
36
static void virtio_set_isr(VirtIODevice *vdev, int value)
37
--
38
2.5.0
39
40
diff view generated by jsdifflib
Deleted patch
1
In ne2000_receive(), we try to assign size_ to size which converts
2
from size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access of
5
for both buf and buf1.
6
1
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/ne2000.c | 4 ++--
15
1 file changed, 2 insertions(+), 2 deletions(-)
16
17
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/ne2000.c
20
+++ b/hw/net/ne2000.c
21
@@ -XXX,XX +XXX,XX @@ static int ne2000_buffer_full(NE2000State *s)
22
ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
23
{
24
NE2000State *s = qemu_get_nic_opaque(nc);
25
- int size = size_;
26
+ size_t size = size_;
27
uint8_t *p;
28
unsigned int total_len, next, avail, len, index, mcast_idx;
29
uint8_t buf1[60];
30
@@ -XXX,XX +XXX,XX @@ ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
31
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
32
33
#if defined(DEBUG_NE2000)
34
- printf("NE2000: received len=%d\n", size);
35
+ printf("NE2000: received len=%zu\n", size);
36
#endif
37
38
if (s->cmd & E8390_STOP || ne2000_buffer_full(s))
39
--
40
2.5.0
41
42
diff view generated by jsdifflib
Deleted patch
1
In rtl8139_do_receive(), we try to assign size_ to size which converts
2
from size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access of
5
for both buf and buf1.
6
1
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/rtl8139.c | 8 ++++----
15
1 file changed, 4 insertions(+), 4 deletions(-)
16
17
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/rtl8139.c
20
+++ b/hw/net/rtl8139.c
21
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
22
RTL8139State *s = qemu_get_nic_opaque(nc);
23
PCIDevice *d = PCI_DEVICE(s);
24
/* size is the length of the buffer passed to the driver */
25
- int size = size_;
26
+ size_t size = size_;
27
const uint8_t *dot1q_buf = NULL;
28
29
uint32_t packet_header = 0;
30
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
31
static const uint8_t broadcast_macaddr[6] =
32
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
33
34
- DPRINTF(">>> received len=%d\n", size);
35
+ DPRINTF(">>> received len=%zu\n", size);
36
37
/* test if board clock is stopped */
38
if (!s->clock_enabled)
39
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
40
41
if (size+4 > rx_space)
42
{
43
- DPRINTF("C+ Rx mode : descriptor %d size %d received %d + 4\n",
44
+ DPRINTF("C+ Rx mode : descriptor %d size %d received %zu + 4\n",
45
descriptor, rx_space, size);
46
47
s->IntrStatus |= RxOverflow;
48
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
49
if (avail != 0 && RX_ALIGN(size + 8) >= avail)
50
{
51
DPRINTF("rx overflow: rx buffer length %d head 0x%04x "
52
- "read 0x%04x === available 0x%04x need 0x%04x\n",
53
+ "read 0x%04x === available 0x%04x need 0x%04zx\n",
54
s->RxBufferSize, s->RxBufAddr, s->RxBufPtr, avail, size + 8);
55
56
s->IntrStatus |= RxOverflow;
57
--
58
2.5.0
59
60
diff view generated by jsdifflib
Deleted patch
1
In pcnet_receive(), we try to assign size_ to size which converts from
2
size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access
5
for both buf and buf1.
6
1
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/pcnet.c | 4 ++--
15
1 file changed, 2 insertions(+), 2 deletions(-)
16
17
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/pcnet.c
20
+++ b/hw/net/pcnet.c
21
@@ -XXX,XX +XXX,XX @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
22
uint8_t buf1[60];
23
int remaining;
24
int crc_err = 0;
25
- int size = size_;
26
+ size_t size = size_;
27
28
if (CSR_DRX(s) || CSR_STOP(s) || CSR_SPND(s) || !size ||
29
(CSR_LOOP(s) && !s->looptest)) {
30
return -1;
31
}
32
#ifdef PCNET_DEBUG
33
- printf("pcnet_receive size=%d\n", size);
34
+ printf("pcnet_receive size=%zu\n", size);
35
#endif
36
37
/* if too small buffer, then expand it */
38
--
39
2.5.0
40
41
diff view generated by jsdifflib
Deleted patch
1
There should not be a reason for passing a packet size greater than
2
INT_MAX. It's usually a hint of bug somewhere, so ignore packet size
3
greater than INT_MAX in qemu_deliver_packet_iov()
4
1
5
CC: qemu-stable@nongnu.org
6
Reported-by: Daniel Shapira <daniel@twistlock.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
net/net.c | 7 ++++++-
11
1 file changed, 6 insertions(+), 1 deletion(-)
12
13
diff --git a/net/net.c b/net/net.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/net/net.c
16
+++ b/net/net.c
17
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
18
void *opaque)
19
{
20
NetClientState *nc = opaque;
21
+ size_t size = iov_size(iov, iovcnt);
22
int ret;
23
24
+ if (size > INT_MAX) {
25
+ return size;
26
+ }
27
+
28
if (nc->link_down) {
29
- return iov_size(iov, iovcnt);
30
+ return size;
31
}
32
33
if (nc->receive_disabled) {
34
--
35
2.5.0
36
37
diff view generated by jsdifflib
1
From: Martin Wilck <mwilck@suse.com>
1
From: Ding Hui <dinghui@sangfor.com.cn>
2
2
3
The e1000 emulation silently discards RX packets if there's
3
The code of setting RX descriptor status field maybe work fine in
4
insufficient space in the ring buffer. This leads to errors
4
previously, however with the update of glibc version, it shows two
5
on higher-level protocols in the guest, with no indication
5
issues when guest using dpdk receive packets:
6
about the error cause.
7
6
8
This patch increments the "Missed Packets Count" (MPC) and
7
1. The dpdk has a certain probability getting wrong buffer_addr
9
"Receive No Buffers Count" (RNBC) HW counters in this case.
10
As the emulation has no FIFO for buffering packets that can't
11
immediately be pushed to the guest, these two registers are
12
practically equivalent (see 10.2.7.4, 10.2.7.33 in
13
https://www.intel.com/content/www/us/en/embedded/products/networking/82574l-gbe-controller-datasheet.html).
14
8
15
On a Linux guest, the register content will be reflected in
9
this impact may be not obvious, such as lost a packet once in
16
the "rx_missed_errors" and "rx_no_buffer_count" stats from
10
a while
17
"ethtool -S", and in the "missed" stat from "ip -s -s link show",
18
giving at least some hint about the error cause inside the guest.
19
11
20
If the cause is known, problems like this can often be avoided
12
2. The dpdk may consume a packet twice when scan the RX desc queue
21
easily, by increasing the number of RX descriptors in the guest
13
over again
22
e1000 driver (e.g under Linux, "e1000.RxDescriptors=1024").
23
14
24
The patch also adds a qemu trace message for this condition.
15
this impact will lead a infinite wait in Qemu, since the RDT
16
(tail pointer) be inscreased to equal to RDH by unexpected,
17
which regard as the RX desc queue is full
25
18
26
Signed-off-by: Martin Wilck <mwilck@suse.com>
19
Write a whole of RX desc with DD flag on is not quite correct, because
20
when the underlying implementation of memcpy using XMM registers to
21
copy e1000_rx_desc (when AVX or something else CPU feature is usable),
22
the bytes order of desc writing to memory is indeterminacy
23
24
We can use full-scale test case to reproduce the issue-2 by
25
https://github.com/BASM/qemu_dpdk_e1000_test (thanks to Leonid Myravjev)
26
27
I also write a POC test case at https://github.com/cdkey/e1000_poc
28
which can reproduce both of them, and easy to verify the patch effect.
29
30
The hw watchpoint also shows that, when Qemu using XMM related instructions
31
writing 16 bytes e1000_rx_desc, concurrent with DPDK using movb
32
writing 1 byte status, the final result of writing to memory will be one
33
of them, if it made by Qemu which DD flag is on, DPDK will consume it
34
again.
35
36
Setting DD status in a separate operation, can prevent the impact of
37
disorder memory writing by memcpy, also avoid unexpected data when
38
concurrent writing status by qemu and guest dpdk.
39
40
Links: https://lore.kernel.org/qemu-devel/20200102110504.GG121208@stefanha-x1.localdomain/T/
41
42
Reported-by: Leonid Myravjev <asm@asm.pp.ru>
43
Cc: Stefan Hajnoczi <stefanha@gmail.com>
44
Cc: Paolo Bonzini <pbonzini@redhat.com>
45
Cc: Michael S. Tsirkin <mst@redhat.com>
46
Cc: qemu-stable@nongnu.org
47
Tested-by: Jing Zhang <zhangjing@sangfor.com.cn>
48
Reviewed-by: Frank Lee <lifan38153@sangfor.com.cn>
49
Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
50
Signed-off-by: Jason Wang <jasowang@redhat.com>
28
---
51
---
29
hw/net/e1000.c | 16 +++++++++++++---
52
hw/net/e1000.c | 5 ++++-
30
hw/net/trace-events | 3 +++
53
1 file changed, 4 insertions(+), 1 deletion(-)
31
2 files changed, 16 insertions(+), 3 deletions(-)
32
54
33
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
55
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
34
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/net/e1000.c
57
--- a/hw/net/e1000.c
36
+++ b/hw/net/e1000.c
58
+++ b/hw/net/e1000.c
37
@@ -XXX,XX +XXX,XX @@
38
#include "qemu/range.h"
39
40
#include "e1000x_common.h"
41
+#include "trace.h"
42
43
static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
44
45
@@ -XXX,XX +XXX,XX @@ static uint64_t rx_desc_base(E1000State *s)
46
return (bah << 32) + bal;
47
}
48
49
+static void
50
+e1000_receiver_overrun(E1000State *s, size_t size)
51
+{
52
+ trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
53
+ e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
54
+ e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
55
+ set_ics(s, 0, E1000_ICS_RXO);
56
+}
57
+
58
static ssize_t
59
e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
60
{
61
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
59
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
62
desc_offset = 0;
60
base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
63
total_size = size + e1000x_fcs_len(s->mac_reg);
61
pci_dma_read(d, base, &desc, sizeof(desc));
64
if (!e1000_has_rxbufs(s, total_size)) {
62
desc.special = vlan_special;
65
- set_ics(s, 0, E1000_ICS_RXO);
63
- desc.status |= (vlan_status | E1000_RXD_STAT_DD);
66
- return -1;
64
+ desc.status &= ~E1000_RXD_STAT_DD;
67
+ e1000_receiver_overrun(s, total_size);
65
if (desc.buffer_addr) {
68
+ return -1;
66
if (desc_offset < size) {
69
}
67
size_t iov_copy;
70
do {
71
desc_size = total_size - desc_offset;
72
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
68
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
73
rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
69
DBGOUT(RX, "Null RX descriptor!!\n");
74
DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
75
rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
76
- set_ics(s, 0, E1000_ICS_RXO);
77
+ e1000_receiver_overrun(s, total_size);
78
return -1;
79
}
70
}
80
} while (desc_offset < total_size);
71
pci_dma_write(d, base, &desc, sizeof(desc));
81
diff --git a/hw/net/trace-events b/hw/net/trace-events
72
+ desc.status |= (vlan_status | E1000_RXD_STAT_DD);
82
index XXXXXXX..XXXXXXX 100644
73
+ pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status),
83
--- a/hw/net/trace-events
74
+ &desc.status, sizeof(desc.status));
84
+++ b/hw/net/trace-events
75
85
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
76
if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
86
net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X"
77
s->mac_reg[RDH] = 0;
87
net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes"
88
89
+# hw/net/e1000.c
90
+e1000_receiver_overrun(size_t s, uint32_t rdh, uint32_t rdt) "Receiver overrun: dropped packet of %lu bytes, RDH=%u, RDT=%u"
91
+
92
# hw/net/e1000x_common.c
93
e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d"
94
e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X"
95
--
78
--
96
2.5.0
79
2.7.4
97
98
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Haochen Tong <i@hexchain.org>
2
2
3
When using the "-device" option, the property is called "mac".
3
bpf_program__set_<TYPE> functions have been deprecated since libbpf 0.8.
4
"macaddr" is only used for the legacy "-net nic" option.
4
Replace with the equivalent bpf_program__set_type call to avoid a
5
deprecation warning.
5
6
6
Reported-by: Harald Hoyer <harald@redhat.com>
7
Signed-off-by: Haochen Tong <i@hexchain.org>
7
Reviewed-by: Markus Armbruster <armbru@redhat.com>
8
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
10
---
11
qemu-options.hx | 2 +-
11
ebpf/ebpf_rss.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
13
14
diff --git a/qemu-options.hx b/qemu-options.hx
14
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-options.hx
16
--- a/ebpf/ebpf_rss.c
17
+++ b/qemu-options.hx
17
+++ b/ebpf/ebpf_rss.c
18
@@ -XXX,XX +XXX,XX @@ qemu-system-i386 linux.img \
18
@@ -XXX,XX +XXX,XX @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
19
-netdev socket,id=n2,mcast=230.0.0.1:1234
19
goto error;
20
# launch yet another QEMU instance on same "bus"
20
}
21
qemu-system-i386 linux.img \
21
22
- -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \
22
- bpf_program__set_socket_filter(rss_bpf_ctx->progs.tun_rss_steering_prog);
23
+ -device e1000,netdev=n3,mac=52:54:00:12:34:58 \
23
+ bpf_program__set_type(rss_bpf_ctx->progs.tun_rss_steering_prog, BPF_PROG_TYPE_SOCKET_FILTER);
24
-netdev socket,id=n3,mcast=230.0.0.1:1234
24
25
@end example
25
if (rss_bpf__load(rss_bpf_ctx)) {
26
26
trace_ebpf_error("eBPF RSS", "can not load RSS program");
27
--
27
--
28
2.5.0
28
2.7.4
29
30
diff view generated by jsdifflib