1
The following changes since commit 6632f6ff96f0537fc34cdc00c760656fc62e23c5:
1
The following changes since commit a73549f99612f758dec0fdea6ae1c30b6c709a0b:
2
2
3
Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging (2017-07-17 11:46:36 +0100)
3
Merge remote-tracking branch 'remotes/kraxel/tags/ui-20181012-pull-request' into staging (2018-10-12 16:45:51 +0100)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 189ae6bb5ce1f5a322f8691d00fe942ba43dd601:
9
for you to fetch changes up to a7ec0077c2db445d6bae421963188367d2695bd6:
10
10
11
virtio-net: fix offload ctrl endian (2017-07-17 20:13:56 +0800)
11
qemu-options: Fix bad "macaddr" property in the documentation (2018-10-15 16:14:15 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
- fix virtio-net ctrl offload endian
15
----------------------------------------------------------------
16
- vnet header support for variou COLO netfilters and compare thread
16
Jason Wang (4):
17
ne2000: fix possible out of bound access in ne2000_receive
18
rtl8139: fix possible out of bound access
19
pcnet: fix possible buffer overflow
20
net: ignore packet size greater than INT_MAX
17
21
18
----------------------------------------------------------------
22
Martin Wilck (1):
19
Jason Wang (1):
23
e1000: indicate dropped packets in HW counters
20
virtio-net: fix offload ctrl endian
21
24
22
Michal Privoznik (1):
25
Thomas Huth (1):
23
virtion-net: Prefer is_power_of_2()
26
qemu-options: Fix bad "macaddr" property in the documentation
24
27
25
Zhang Chen (12):
28
Zhang Chen (15):
26
net: Add vnet_hdr_len arguments in NetClientState
29
filter-rewriter: Add TCP state machine and fix memory leak in connection_track_table
27
net/net.c: Add vnet_hdr support in SocketReadState
30
colo-compare: implement the process of checkpoint
28
net/filter-mirror.c: Introduce parameter for filter_send()
31
colo-compare: use notifier to notify packets comparing result
29
net/filter-mirror.c: Make filter mirror support vnet support.
32
1;5202;0c1;5202;0c COLO: integrate colo compare with colo frame
30
net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
33
COLO: Add block replication into colo process
31
net/colo.c: Make vnet_hdr_len as packet property
34
COLO: Remove colo_state migration struct
32
net/colo-compare.c: Introduce parameter for compare_chr_send()
35
COLO: Load dirty pages into SVM's RAM cache firstly
33
net/colo-compare.c: Make colo-compare support vnet_hdr_len
36
ram/COLO: Record the dirty pages that SVM received
34
net/colo.c: Add vnet packet parse feature in colo-proxy
37
COLO: Flush memory data from ram cache
35
net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
38
qapi/migration.json: Rename COLO unknown mode to none mode.
36
net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
39
qapi: Add new command to query colo status
37
docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
40
savevm: split the process of different stages for loadvm/savevm
41
filter: Add handle_event method for NetFilterClass
42
filter-rewriter: handle checkpoint and failover event
43
docs: Add COLO status diagram to COLO-FT.txt
38
44
39
docs/colo-proxy.txt | 26 ++++++++++++++++
45
liujunjie (1):
40
hw/net/virtio-net.c | 4 ++-
46
clean up callback when del virtqueue
41
include/net/net.h | 10 ++++--
47
42
net/colo-compare.c | 84 ++++++++++++++++++++++++++++++++++++++++++---------
48
zhanghailiang (4):
43
net/colo.c | 9 +++---
49
qmp event: Add COLO_EXIT event to notify users while exited COLO
44
net/colo.h | 4 ++-
50
COLO: flush host dirty ram from cache
45
net/filter-mirror.c | 75 +++++++++++++++++++++++++++++++++++++++++----
51
COLO: notify net filters about checkpoint/failover event
46
net/filter-rewriter.c | 37 ++++++++++++++++++++++-
52
COLO: quick failover process by kick COLO thread
47
net/net.c | 37 ++++++++++++++++++++---
53
48
net/socket.c | 8 ++---
54
docs/COLO-FT.txt | 34 ++++++++
49
qemu-options.hx | 19 ++++++------
55
hw/net/e1000.c | 16 +++-
50
11 files changed, 265 insertions(+), 48 deletions(-)
56
hw/net/ne2000.c | 4 +-
57
hw/net/pcnet.c | 4 +-
58
hw/net/rtl8139.c | 8 +-
59
hw/net/trace-events | 3 +
60
hw/virtio/virtio.c | 2 +
61
include/exec/ram_addr.h | 1 +
62
include/migration/colo.h | 11 ++-
63
include/net/filter.h | 5 ++
64
migration/Makefile.objs | 2 +-
65
migration/colo-comm.c | 76 -----------------
66
migration/colo-failover.c | 2 +-
67
migration/colo.c | 212 +++++++++++++++++++++++++++++++++++++++++++---
68
migration/migration.c | 46 ++++++++--
69
migration/ram.c | 166 +++++++++++++++++++++++++++++++++++-
70
migration/ram.h | 4 +
71
migration/savevm.c | 53 ++++++++++--
72
migration/savevm.h | 5 ++
73
migration/trace-events | 3 +
74
net/colo-compare.c | 115 ++++++++++++++++++++++---
75
net/colo-compare.h | 24 ++++++
76
net/colo.c | 10 ++-
77
net/colo.h | 11 +--
78
net/filter-rewriter.c | 166 +++++++++++++++++++++++++++++++++---
79
net/filter.c | 17 ++++
80
net/net.c | 26 +++++-
81
qapi/migration.json | 80 +++++++++++++++--
82
qemu-options.hx | 2 +-
83
vl.c | 2 -
84
30 files changed, 958 insertions(+), 152 deletions(-)
85
delete mode 100644 migration/colo-comm.c
86
create mode 100644 net/colo-compare.h
51
87
52
88
89
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <zhangckid@gmail.com>
2
2
3
Make colo-compare and filter-rewriter can parse vnet packet.
3
We add almost full TCP state machine in filter-rewriter, except
4
4
TCPS_LISTEN and some simplify in VM active close FIN states.
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
The reason for this simplify job is because guest kernel will track
6
the TCP status and wait 2MSL time too, if client resend the FIN packet,
7
guest will resend the last ACK, so we needn't wait 2MSL time in filter-rewriter.
8
9
After a net connection is closed, we didn't clear its related resources
10
in connection_track_table, which will lead to memory leak.
11
12
Let's track the state of net connection, if it is closed, its related
13
resources will be cleared up.
14
15
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
16
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
17
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
19
---
8
net/colo.c | 6 +++---
20
net/colo.c | 2 +-
9
1 file changed, 3 insertions(+), 3 deletions(-)
21
net/colo.h | 9 ++---
22
net/filter-rewriter.c | 109 +++++++++++++++++++++++++++++++++++++++++++++-----
23
3 files changed, 104 insertions(+), 16 deletions(-)
10
24
11
diff --git a/net/colo.c b/net/colo.c
25
diff --git a/net/colo.c b/net/colo.c
12
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo.c
27
--- a/net/colo.c
14
+++ b/net/colo.c
28
+++ b/net/colo.c
15
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
29
@@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key)
30
conn->ip_proto = key->ip_proto;
31
conn->processing = false;
32
conn->offset = 0;
33
- conn->syn_flag = 0;
34
+ conn->tcp_state = TCPS_CLOSED;
35
conn->pack = 0;
36
conn->sack = 0;
37
g_queue_init(&conn->primary_list);
38
diff --git a/net/colo.h b/net/colo.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/net/colo.h
41
+++ b/net/colo.h
42
@@ -XXX,XX +XXX,XX @@
43
#include "slirp/slirp.h"
44
#include "qemu/jhash.h"
45
#include "qemu/timer.h"
46
+#include "slirp/tcp.h"
47
48
#define HASHTABLE_MAX_SIZE 16384
49
50
@@ -XXX,XX +XXX,XX @@ typedef struct Connection {
51
uint32_t sack;
52
/* offset = secondary_seq - primary_seq */
53
tcp_seq offset;
54
- /*
55
- * we use this flag update offset func
56
- * run once in independent tcp connection
57
- */
58
- int syn_flag;
59
+
60
+ int tcp_state; /* TCP FSM state */
61
+ tcp_seq fin_ack_seq; /* the seq of 'fin=1,ack=1' */
62
} Connection;
63
64
uint32_t connection_key_hash(const void *opaque);
65
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/net/filter-rewriter.c
68
+++ b/net/filter-rewriter.c
69
@@ -XXX,XX +XXX,XX @@ static int is_tcp_packet(Packet *pkt)
70
}
71
72
/* handle tcp packet from primary guest */
73
-static int handle_primary_tcp_pkt(NetFilterState *nf,
74
+static int handle_primary_tcp_pkt(RewriterState *rf,
75
Connection *conn,
76
- Packet *pkt)
77
+ Packet *pkt, ConnectionKey *key)
16
{
78
{
17
int network_length;
79
struct tcphdr *tcp_pkt;
18
static const uint8_t vlan[] = {0x81, 0x00};
80
19
- uint8_t *data = pkt->data;
81
@@ -XXX,XX +XXX,XX @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
20
+ uint8_t *data = pkt->data + pkt->vnet_hdr_len;
82
trace_colo_filter_rewriter_conn_offset(conn->offset);
21
uint16_t l3_proto;
83
}
22
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
84
23
85
+ if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
24
- if (pkt->size < ETH_HLEN) {
86
+ conn->tcp_state == TCPS_SYN_SENT) {
25
+ if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
87
+ conn->tcp_state = TCPS_ESTABLISHED;
26
trace_colo_proxy_main("pkt->size < ETH_HLEN");
88
+ }
27
return 1;
89
+
28
}
90
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
29
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
91
/*
30
}
92
* we use this flag update offset func
31
93
* run once in independent tcp connection
32
network_length = pkt->ip->ip_hl * 4;
94
*/
33
- if (pkt->size < l2hdr_len + network_length) {
95
- conn->syn_flag = 1;
34
+ if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
96
+ conn->tcp_state = TCPS_SYN_RECEIVED;
35
trace_colo_proxy_main("pkt->size < network_header + network_length");
97
}
36
return 1;
98
37
}
99
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
100
- if (conn->syn_flag) {
101
+ if (conn->tcp_state == TCPS_SYN_RECEIVED) {
102
/*
103
* offset = secondary_seq - primary seq
104
* ack packet sent by guest from primary node,
105
* so we use th_ack - 1 get primary_seq
106
*/
107
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
108
- conn->syn_flag = 0;
109
+ conn->tcp_state = TCPS_ESTABLISHED;
110
}
111
if (conn->offset) {
112
/* handle packets to the secondary from the primary */
113
@@ -XXX,XX +XXX,XX @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
114
net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
115
pkt->size - pkt->vnet_hdr_len);
116
}
117
+
118
+ /*
119
+ * Passive close step 3
120
+ */
121
+ if ((conn->tcp_state == TCPS_LAST_ACK) &&
122
+ (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
123
+ conn->tcp_state = TCPS_CLOSED;
124
+ g_hash_table_remove(rf->connection_track_table, key);
125
+ }
126
+ }
127
+
128
+ if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
129
+ /*
130
+ * Passive close.
131
+ * Step 1:
132
+ * The *server* side of this connect is VM, *client* tries to close
133
+ * the connection. We will into CLOSE_WAIT status.
134
+ *
135
+ * Step 2:
136
+ * In this step we will into LAST_ACK status.
137
+ *
138
+ * We got 'fin=1, ack=1' packet from server side, we need to
139
+ * record the seq of 'fin=1, ack=1' packet.
140
+ *
141
+ * Step 3:
142
+ * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
143
+ * packet from server side. From this point, we can ensure that there
144
+ * will be no packets in the connection, except that, some errors
145
+ * happen between the path of 'filter object' and vNIC, if this rare
146
+ * case really happen, we can still create a new connection,
147
+ * So it is safe to remove the connection from connection_track_table.
148
+ *
149
+ */
150
+ if (conn->tcp_state == TCPS_ESTABLISHED) {
151
+ conn->tcp_state = TCPS_CLOSE_WAIT;
152
+ }
153
+
154
+ /*
155
+ * Active close step 2.
156
+ */
157
+ if (conn->tcp_state == TCPS_FIN_WAIT_1) {
158
+ conn->tcp_state = TCPS_TIME_WAIT;
159
+ /*
160
+ * For simplify implementation, we needn't wait 2MSL time
161
+ * in filter rewriter. Because guest kernel will track the
162
+ * TCP status and wait 2MSL time, if client resend the FIN
163
+ * packet, guest will apply the last ACK too.
164
+ */
165
+ conn->tcp_state = TCPS_CLOSED;
166
+ g_hash_table_remove(rf->connection_track_table, key);
167
+ }
168
}
169
170
return 0;
171
}
172
173
/* handle tcp packet from secondary guest */
174
-static int handle_secondary_tcp_pkt(NetFilterState *nf,
175
+static int handle_secondary_tcp_pkt(RewriterState *rf,
176
Connection *conn,
177
- Packet *pkt)
178
+ Packet *pkt, ConnectionKey *key)
179
{
180
struct tcphdr *tcp_pkt;
181
182
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
183
trace_colo_filter_rewriter_conn_offset(conn->offset);
184
}
185
186
- if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
187
+ if (conn->tcp_state == TCPS_SYN_RECEIVED &&
188
+ ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
189
/*
190
* save offset = secondary_seq and then
191
* in handle_primary_tcp_pkt make offset
192
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
193
conn->offset = ntohl(tcp_pkt->th_seq);
194
}
195
196
+ /* VM active connect */
197
+ if (conn->tcp_state == TCPS_CLOSED &&
198
+ ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
199
+ conn->tcp_state = TCPS_SYN_SENT;
200
+ }
201
+
202
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
203
/* Only need to adjust seq while offset is Non-zero */
204
if (conn->offset) {
205
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
206
}
207
}
208
209
+ /*
210
+ * Passive close step 2:
211
+ */
212
+ if (conn->tcp_state == TCPS_CLOSE_WAIT &&
213
+ (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
214
+ conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
215
+ conn->tcp_state = TCPS_LAST_ACK;
216
+ }
217
+
218
+ /*
219
+ * Active close
220
+ *
221
+ * Step 1:
222
+ * The *server* side of this connect is VM, *server* tries to close
223
+ * the connection.
224
+ *
225
+ * Step 2:
226
+ * We will into CLOSE_WAIT status.
227
+ * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
228
+ * CLOSING status.
229
+ */
230
+ if (conn->tcp_state == TCPS_ESTABLISHED &&
231
+ (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
232
+ conn->tcp_state = TCPS_FIN_WAIT_1;
233
+ }
234
+
235
return 0;
236
}
237
238
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
239
240
if (sender == nf->netdev) {
241
/* NET_FILTER_DIRECTION_TX */
242
- if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
243
+ if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
244
qemu_net_queue_send(s->incoming_queue, sender, 0,
245
(const uint8_t *)pkt->data, pkt->size, NULL);
246
packet_destroy(pkt, NULL);
247
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
248
}
249
} else {
250
/* NET_FILTER_DIRECTION_RX */
251
- if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
252
+ if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
253
qemu_net_queue_send(s->incoming_queue, sender, 0,
254
(const uint8_t *)pkt->data, pkt->size, NULL);
255
packet_destroy(pkt, NULL);
38
--
256
--
39
2.7.4
257
2.5.0
40
258
41
259
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <zhangckid@gmail.com>
2
2
3
COLO-Proxy just focus on packet payload, so we skip vnet header.
3
While do checkpoint, we need to flush all the unhandled packets,
4
4
By using the filter notifier mechanism, we can easily to notify
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
every compare object to do this process, which runs inside
6
of compare threads as a coroutine.
7
8
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
9
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
10
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
12
---
8
net/colo-compare.c | 8 ++++++--
13
include/migration/colo.h | 6 ++++
9
1 file changed, 6 insertions(+), 2 deletions(-)
14
net/colo-compare.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
10
15
net/colo-compare.h | 22 ++++++++++++++
16
3 files changed, 106 insertions(+)
17
create mode 100644 net/colo-compare.h
18
19
diff --git a/include/migration/colo.h b/include/migration/colo.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/migration/colo.h
22
+++ b/include/migration/colo.h
23
@@ -XXX,XX +XXX,XX @@
24
#include "qemu-common.h"
25
#include "qapi/qapi-types-migration.h"
26
27
+enum colo_event {
28
+ COLO_EVENT_NONE,
29
+ COLO_EVENT_CHECKPOINT,
30
+ COLO_EVENT_FAILOVER,
31
+};
32
+
33
void colo_info_init(void);
34
35
void migrate_start_colo_process(MigrationState *s);
11
diff --git a/net/colo-compare.c b/net/colo-compare.c
36
diff --git a/net/colo-compare.c b/net/colo-compare.c
12
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo-compare.c
38
--- a/net/colo-compare.c
14
+++ b/net/colo-compare.c
39
+++ b/net/colo-compare.c
15
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
40
@@ -XXX,XX +XXX,XX @@
16
sec_ip_src, sec_ip_dst);
41
#include "qemu/sockets.h"
42
#include "colo.h"
43
#include "sysemu/iothread.h"
44
+#include "net/colo-compare.h"
45
+#include "migration/colo.h"
46
47
#define TYPE_COLO_COMPARE "colo-compare"
48
#define COLO_COMPARE(obj) \
49
OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
50
51
+static QTAILQ_HEAD(, CompareState) net_compares =
52
+ QTAILQ_HEAD_INITIALIZER(net_compares);
53
+
54
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
55
#define MAX_QUEUE_SIZE 1024
56
57
@@ -XXX,XX +XXX,XX @@
58
/* TODO: Should be configurable */
59
#define REGULAR_PACKET_CHECK_MS 3000
60
61
+static QemuMutex event_mtx;
62
+static QemuCond event_complete_cond;
63
+static int event_unhandled_count;
64
+
65
/*
66
* + CompareState ++
67
* | |
68
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
69
IOThread *iothread;
70
GMainContext *worker_context;
71
QEMUTimer *packet_check_timer;
72
+
73
+ QEMUBH *event_bh;
74
+ enum colo_event event;
75
+
76
+ QTAILQ_ENTRY(CompareState) next;
77
} CompareState;
78
79
typedef struct CompareClass {
80
@@ -XXX,XX +XXX,XX @@ static void check_old_packet_regular(void *opaque)
81
REGULAR_PACKET_CHECK_MS);
82
}
83
84
+/* Public API, Used for COLO frame to notify compare event */
85
+void colo_notify_compares_event(void *opaque, int event, Error **errp)
86
+{
87
+ CompareState *s;
88
+
89
+ qemu_mutex_lock(&event_mtx);
90
+ QTAILQ_FOREACH(s, &net_compares, next) {
91
+ s->event = event;
92
+ qemu_bh_schedule(s->event_bh);
93
+ event_unhandled_count++;
94
+ }
95
+ /* Wait all compare threads to finish handling this event */
96
+ while (event_unhandled_count > 0) {
97
+ qemu_cond_wait(&event_complete_cond, &event_mtx);
98
+ }
99
+
100
+ qemu_mutex_unlock(&event_mtx);
101
+}
102
+
103
static void colo_compare_timer_init(CompareState *s)
104
{
105
AioContext *ctx = iothread_get_aio_context(s->iothread);
106
@@ -XXX,XX +XXX,XX @@ static void colo_compare_timer_del(CompareState *s)
17
}
107
}
18
108
}
19
+ offset = ppkt->vnet_hdr_len + offset;
109
20
+
110
+static void colo_flush_packets(void *opaque, void *user_data);
21
if (ppkt->size == spkt->size) {
111
+
22
- return memcmp(ppkt->data + offset, spkt->data + offset,
112
+static void colo_compare_handle_event(void *opaque)
23
+ return memcmp(ppkt->data + offset,
113
+{
24
+ spkt->data + offset,
114
+ CompareState *s = opaque;
25
spkt->size - offset);
115
+
26
} else {
116
+ switch (s->event) {
27
trace_colo_compare_main("Net packet size are not the same");
117
+ case COLO_EVENT_CHECKPOINT:
28
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
118
+ g_queue_foreach(&s->conn_list, colo_flush_packets, s);
29
*/
119
+ break;
30
if (ptcp->th_off > 5) {
120
+ case COLO_EVENT_FAILOVER:
31
ptrdiff_t tcp_offset;
121
+ break;
32
+
122
+ default:
33
tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
123
+ break;
34
- + (ptcp->th_off * 4);
124
+ }
35
+ + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
125
+
36
res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
126
+ assert(event_unhandled_count > 0);
37
} else if (ptcp->th_sum == stcp->th_sum) {
127
+
38
res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
128
+ qemu_mutex_lock(&event_mtx);
129
+ event_unhandled_count--;
130
+ qemu_cond_broadcast(&event_complete_cond);
131
+ qemu_mutex_unlock(&event_mtx);
132
+}
133
+
134
static void colo_compare_iothread(CompareState *s)
135
{
136
object_ref(OBJECT(s->iothread));
137
@@ -XXX,XX +XXX,XX @@ static void colo_compare_iothread(CompareState *s)
138
s, s->worker_context, true);
139
140
colo_compare_timer_init(s);
141
+ s->event_bh = qemu_bh_new(colo_compare_handle_event, s);
142
}
143
144
static char *compare_get_pri_indev(Object *obj, Error **errp)
145
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
146
net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
147
net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
148
149
+ QTAILQ_INSERT_TAIL(&net_compares, s, next);
150
+
151
g_queue_init(&s->conn_list);
152
153
+ qemu_mutex_init(&event_mtx);
154
+ qemu_cond_init(&event_complete_cond);
155
+
156
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
157
connection_key_equal,
158
g_free,
159
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
160
static void colo_compare_finalize(Object *obj)
161
{
162
CompareState *s = COLO_COMPARE(obj);
163
+ CompareState *tmp = NULL;
164
165
qemu_chr_fe_deinit(&s->chr_pri_in, false);
166
qemu_chr_fe_deinit(&s->chr_sec_in, false);
167
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
168
if (s->iothread) {
169
colo_compare_timer_del(s);
170
}
171
+
172
+ qemu_bh_delete(s->event_bh);
173
+
174
+ QTAILQ_FOREACH(tmp, &net_compares, next) {
175
+ if (tmp == s) {
176
+ QTAILQ_REMOVE(&net_compares, s, next);
177
+ break;
178
+ }
179
+ }
180
+
181
/* Release all unhandled packets after compare thead exited */
182
g_queue_foreach(&s->conn_list, colo_flush_packets, s);
183
184
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
185
if (s->iothread) {
186
object_unref(OBJECT(s->iothread));
187
}
188
+
189
+ qemu_mutex_destroy(&event_mtx);
190
+ qemu_cond_destroy(&event_complete_cond);
191
+
192
g_free(s->pri_indev);
193
g_free(s->sec_indev);
194
g_free(s->outdev);
195
diff --git a/net/colo-compare.h b/net/colo-compare.h
196
new file mode 100644
197
index XXXXXXX..XXXXXXX
198
--- /dev/null
199
+++ b/net/colo-compare.h
200
@@ -XXX,XX +XXX,XX @@
201
+/*
202
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
203
+ * (a.k.a. Fault Tolerance or Continuous Replication)
204
+ *
205
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
206
+ * Copyright (c) 2017 FUJITSU LIMITED
207
+ * Copyright (c) 2017 Intel Corporation
208
+ *
209
+ * Authors:
210
+ * zhanghailiang <zhang.zhanghailiang@huawei.com>
211
+ * Zhang Chen <zhangckid@gmail.com>
212
+ *
213
+ * This work is licensed under the terms of the GNU GPL, version 2 or
214
+ * later. See the COPYING file in the top-level directory.
215
+ */
216
+
217
+#ifndef QEMU_COLO_COMPARE_H
218
+#define QEMU_COLO_COMPARE_H
219
+
220
+void colo_notify_compares_event(void *opaque, int event, Error **errp);
221
+
222
+#endif /* QEMU_COLO_COMPARE_H */
39
--
223
--
40
2.7.4
224
2.5.0
41
225
42
226
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <zhangckid@gmail.com>
2
2
3
This patch change the compare_chr_send() parameter from CharBackend to CompareState,
3
It's a good idea to use notifier to notify COLO frame of
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
inconsistent packets comparing.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
10
---
9
net/colo-compare.c | 14 +++++++-------
11
net/colo-compare.c | 37 ++++++++++++++++++++++++++-----------
10
1 file changed, 7 insertions(+), 7 deletions(-)
12
net/colo-compare.h | 2 ++
13
2 files changed, 28 insertions(+), 11 deletions(-)
11
14
12
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/net/colo-compare.c b/net/colo-compare.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/net/colo-compare.c
17
--- a/net/colo-compare.c
15
+++ b/net/colo-compare.c
18
+++ b/net/colo-compare.c
16
@@ -XXX,XX +XXX,XX @@ enum {
19
@@ -XXX,XX +XXX,XX @@
17
SECONDARY_IN,
20
#include "sysemu/iothread.h"
18
};
21
#include "net/colo-compare.h"
19
22
#include "migration/colo.h"
20
-static int compare_chr_send(CharBackend *out,
23
+#include "migration/migration.h"
21
+static int compare_chr_send(CompareState *s,
24
22
const uint8_t *buf,
25
#define TYPE_COLO_COMPARE "colo-compare"
23
uint32_t size);
26
#define COLO_COMPARE(obj) \
24
27
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
28
static QTAILQ_HEAD(, CompareState) net_compares =
26
}
29
QTAILQ_HEAD_INITIALIZER(net_compares);
27
30
28
if (result) {
31
+static NotifierList colo_compare_notifiers =
29
- ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size);
32
+ NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers);
30
+ ret = compare_chr_send(s, pkt->data, pkt->size);
33
+
31
if (ret < 0) {
34
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
32
error_report("colo_send_primary_packet failed");
35
#define MAX_QUEUE_SIZE 1024
33
}
36
34
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
37
@@ -XXX,XX +XXX,XX @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
38
return false;
39
}
40
41
+static void colo_compare_inconsistency_notify(void)
42
+{
43
+ notifier_list_notify(&colo_compare_notifiers,
44
+ migrate_get_current());
45
+}
46
+
47
static void colo_compare_tcp(CompareState *s, Connection *conn)
48
{
49
Packet *ppkt = NULL, *spkt = NULL;
50
@@ -XXX,XX +XXX,XX @@ sec:
51
qemu_hexdump((char *)spkt->data, stderr,
52
"colo-compare spkt", spkt->size);
53
54
- /*
55
- * colo_compare_inconsistent_notify();
56
- * TODO: notice to checkpoint();
57
- */
58
+ colo_compare_inconsistency_notify();
35
}
59
}
36
}
60
}
37
61
38
-static int compare_chr_send(CharBackend *out,
62
@@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
39
+static int compare_chr_send(CompareState *s,
63
}
40
const uint8_t *buf,
64
}
41
uint32_t size)
65
66
+void colo_compare_register_notifier(Notifier *notify)
67
+{
68
+ notifier_list_add(&colo_compare_notifiers, notify);
69
+}
70
+
71
+void colo_compare_unregister_notifier(Notifier *notify)
72
+{
73
+ notifier_remove(notify);
74
+}
75
+
76
static int colo_old_packet_check_one_conn(Connection *conn,
77
- void *user_data)
78
+ void *user_data)
42
{
79
{
43
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CharBackend *out,
80
GList *result = NULL;
81
int64_t check_time = REGULAR_PACKET_CHECK_MS;
82
@@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one_conn(Connection *conn,
83
84
if (result) {
85
/* Do checkpoint will flush old packet */
86
- /*
87
- * TODO: Notify colo frame to do checkpoint.
88
- * colo_compare_inconsistent_notify();
89
- */
90
+ colo_compare_inconsistency_notify();
44
return 0;
91
return 0;
45
}
92
}
46
93
47
- ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
94
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
48
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
95
/*
49
if (ret != sizeof(len)) {
96
* If one packet arrive late, the secondary_list or
50
goto err;
97
* primary_list will be empty, so we can't compare it
98
- * until next comparison.
99
+ * until next comparison. If the packets in the list are
100
+ * timeout, it will trigger a checkpoint request.
101
*/
102
trace_colo_compare_main("packet different");
103
g_queue_push_head(&conn->primary_list, pkt);
104
- /* TODO: colo_notify_checkpoint();*/
105
+ colo_compare_inconsistency_notify();
106
break;
107
}
51
}
108
}
52
109
diff --git a/net/colo-compare.h b/net/colo-compare.h
53
- ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
110
index XXXXXXX..XXXXXXX 100644
54
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
111
--- a/net/colo-compare.h
55
if (ret != size) {
112
+++ b/net/colo-compare.h
56
goto err;
113
@@ -XXX,XX +XXX,XX @@
57
}
114
#define QEMU_COLO_COMPARE_H
58
@@ -XXX,XX +XXX,XX @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)
115
59
116
void colo_notify_compares_event(void *opaque, int event, Error **errp);
60
if (packet_enqueue(s, PRIMARY_IN)) {
117
+void colo_compare_register_notifier(Notifier *notify);
61
trace_colo_compare_main("primary: unsupported packet in");
118
+void colo_compare_unregister_notifier(Notifier *notify);
62
- compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len);
119
63
+ compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
120
#endif /* QEMU_COLO_COMPARE_H */
64
} else {
65
/* compare connection */
66
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
67
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
68
69
while (!g_queue_is_empty(&conn->primary_list)) {
70
pkt = g_queue_pop_head(&conn->primary_list);
71
- compare_chr_send(&s->chr_out, pkt->data, pkt->size);
72
+ compare_chr_send(s, pkt->data, pkt->size);
73
packet_destroy(pkt, NULL);
74
}
75
while (!g_queue_is_empty(&conn->secondary_list)) {
76
--
121
--
77
2.7.4
122
2.5.0
78
123
79
124
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <zhangckid@gmail.com>
2
2
3
We add the vnet_hdr_support option for colo-compare, default is disabled.
3
For COLO FT, both the PVM and SVM run at the same time,
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
only sync the state while it needs.
5
You can use it for example:
6
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
7
5
8
COLO-compare can get vnet header length from filter,
6
So here, let SVM runs while not doing checkpoint, change
9
Add vnet_hdr_len to struct packet and output packet with
7
DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100.
10
the vnet_hdr_len.
11
8
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
9
Besides, we forgot to release colo_checkpoint_semd and
10
colo_delay_timer, fix them here.
11
12
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
17
---
15
net/colo-compare.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------
18
migration/colo.c | 42 ++++++++++++++++++++++++++++++++++++++++--
16
qemu-options.hx | 4 ++--
19
migration/migration.c | 6 ++----
17
2 files changed, 55 insertions(+), 9 deletions(-)
20
2 files changed, 42 insertions(+), 6 deletions(-)
18
21
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
22
diff --git a/migration/colo.c b/migration/colo.c
20
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
24
--- a/migration/colo.c
22
+++ b/net/colo-compare.c
25
+++ b/migration/colo.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
26
@@ -XXX,XX +XXX,XX @@
24
CharBackend chr_out;
27
#include "qemu/error-report.h"
25
SocketReadState pri_rs;
28
#include "migration/failover.h"
26
SocketReadState sec_rs;
29
#include "replication.h"
27
+ bool vnet_hdr;
30
+#include "net/colo-compare.h"
28
31
+#include "net/colo.h"
29
/* connection list: the connections belonged to this NIC could be found
32
30
* in this list.
33
static bool vmstate_loading;
31
@@ -XXX,XX +XXX,XX @@ enum {
34
+static Notifier packets_compare_notifier;
32
35
33
static int compare_chr_send(CompareState *s,
36
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
34
const uint8_t *buf,
37
35
- uint32_t size);
38
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
36
+ uint32_t size,
39
goto out;
37
+ uint32_t vnet_hdr_len);
38
39
static gint seq_sorter(Packet *a, Packet *b, gpointer data)
40
{
41
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
42
}
43
44
if (result) {
45
- ret = compare_chr_send(s, pkt->data, pkt->size);
46
+ ret = compare_chr_send(s,
47
+ pkt->data,
48
+ pkt->size,
49
+ pkt->vnet_hdr_len);
50
if (ret < 0) {
51
error_report("colo_send_primary_packet failed");
52
}
53
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
54
55
static int compare_chr_send(CompareState *s,
56
const uint8_t *buf,
57
- uint32_t size)
58
+ uint32_t size,
59
+ uint32_t vnet_hdr_len)
60
{
61
int ret = 0;
62
uint32_t len = htonl(size);
63
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s,
64
goto err;
65
}
40
}
66
41
67
+ if (s->vnet_hdr) {
42
+ colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
68
+ /*
43
+ if (local_err) {
69
+ * We send vnet header len make other module(like filter-redirector)
44
+ goto out;
70
+ * know how to parse net packet correctly.
71
+ */
72
+ len = htonl(vnet_hdr_len);
73
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
74
+ if (ret != sizeof(len)) {
75
+ goto err;
76
+ }
77
+ }
45
+ }
78
+
46
+
79
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
47
/* Disable block migration */
80
if (ret != size) {
48
migrate_set_block_enabled(false, &local_err);
81
goto err;
49
qemu_savevm_state_header(fb);
82
@@ -XXX,XX +XXX,XX @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp)
50
@@ -XXX,XX +XXX,XX @@ out:
83
s->outdev = g_strdup(value);
51
return ret;
84
}
52
}
85
53
86
+static bool compare_get_vnet_hdr(Object *obj, Error **errp)
54
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
87
+{
55
+{
88
+ CompareState *s = COLO_COMPARE(obj);
56
+ colo_checkpoint_notify(data);
89
+
90
+ return s->vnet_hdr;
91
+}
57
+}
92
+
58
+
93
+static void compare_set_vnet_hdr(Object *obj,
59
static void colo_process_checkpoint(MigrationState *s)
94
+ bool value,
60
{
95
+ Error **errp)
61
QIOChannelBuffer *bioc;
96
+{
62
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
97
+ CompareState *s = COLO_COMPARE(obj);
63
goto out;
64
}
65
66
+ packets_compare_notifier.notify = colo_compare_notify_checkpoint;
67
+ colo_compare_register_notifier(&packets_compare_notifier);
98
+
68
+
99
+ s->vnet_hdr = value;
69
/*
100
+}
70
* Wait for Secondary finish loading VM states and enter COLO
71
* restore.
72
@@ -XXX,XX +XXX,XX @@ out:
73
qemu_fclose(fb);
74
}
75
76
- timer_del(s->colo_delay_timer);
77
-
78
/* Hope this not to be too long to wait here */
79
qemu_sem_wait(&s->colo_exit_sem);
80
qemu_sem_destroy(&s->colo_exit_sem);
101
+
81
+
102
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
82
+ /*
103
{
83
+ * It is safe to unregister notifier after failover finished.
104
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
84
+ * Besides, colo_delay_timer and colo_checkpoint_sem can't be
105
85
+ * released befor unregister notifier, or there will be use-after-free
106
if (packet_enqueue(s, PRIMARY_IN)) {
86
+ * error.
107
trace_colo_compare_main("primary: unsupported packet in");
87
+ */
108
- compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
88
+ colo_compare_unregister_notifier(&packets_compare_notifier);
109
+ compare_chr_send(s,
89
+ timer_del(s->colo_delay_timer);
110
+ pri_rs->buf,
90
+ timer_free(s->colo_delay_timer);
111
+ pri_rs->packet_len,
91
+ qemu_sem_destroy(&s->colo_checkpoint_sem);
112
+ pri_rs->vnet_hdr_len);
113
} else {
114
/* compare connection */
115
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
116
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
117
return;
118
}
119
120
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
121
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
122
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
123
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
124
125
g_queue_init(&s->conn_list);
126
127
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
128
129
while (!g_queue_is_empty(&conn->primary_list)) {
130
pkt = g_queue_pop_head(&conn->primary_list);
131
- compare_chr_send(s, pkt->data, pkt->size);
132
+ compare_chr_send(s,
133
+ pkt->data,
134
+ pkt->size,
135
+ pkt->vnet_hdr_len);
136
packet_destroy(pkt, NULL);
137
}
138
while (!g_queue_is_empty(&conn->secondary_list)) {
139
@@ -XXX,XX +XXX,XX @@ static void colo_compare_class_init(ObjectClass *oc, void *data)
140
141
static void colo_compare_init(Object *obj)
142
{
143
+ CompareState *s = COLO_COMPARE(obj);
144
+
92
+
145
object_property_add_str(obj, "primary_in",
93
/*
146
compare_get_pri_indev, compare_set_pri_indev,
94
* Must be called after failover BH is completed,
147
NULL);
95
* Or the failover BH may shutdown the wrong fd that
148
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
96
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
149
object_property_add_str(obj, "outdev",
97
fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
150
compare_get_outdev, compare_set_outdev,
98
object_unref(OBJECT(bioc));
151
NULL);
99
100
+ qemu_mutex_lock_iothread();
101
+ vm_start();
102
+ trace_colo_vm_state_change("stop", "run");
103
+ qemu_mutex_unlock_iothread();
152
+
104
+
153
+ s->vnet_hdr = false;
105
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
154
+ object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
106
&local_err);
155
+ compare_set_vnet_hdr, NULL);
107
if (local_err) {
156
}
108
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
157
109
goto out;
158
static void colo_compare_finalize(Object *obj)
110
}
159
diff --git a/qemu-options.hx b/qemu-options.hx
111
112
+ qemu_mutex_lock_iothread();
113
+ vm_stop_force_state(RUN_STATE_COLO);
114
+ trace_colo_vm_state_change("run", "stop");
115
+ qemu_mutex_unlock_iothread();
116
+
117
/* FIXME: This is unnecessary for periodic checkpoint mode */
118
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
119
&local_err);
120
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
121
}
122
123
vmstate_loading = false;
124
+ vm_start();
125
+ trace_colo_vm_state_change("stop", "run");
126
qemu_mutex_unlock_iothread();
127
128
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
129
diff --git a/migration/migration.c b/migration/migration.c
160
index XXXXXXX..XXXXXXX 100644
130
index XXXXXXX..XXXXXXX 100644
161
--- a/qemu-options.hx
131
--- a/migration/migration.c
162
+++ b/qemu-options.hx
132
+++ b/migration/migration.c
163
@@ -XXX,XX +XXX,XX @@ Dump the network traffic on netdev @var{dev} to the file specified by
133
@@ -XXX,XX +XXX,XX @@
164
The file format is libpcap, so it can be analyzed with tools such as tcpdump
134
/* Migration XBZRLE default cache size */
165
or Wireshark.
135
#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
166
136
167
-@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
137
-/* The delay time (in ms) between two COLO checkpoints
168
-outdev=@var{chardevid}
138
- * Note: Please change this default value to 10000 when we support hybrid mode.
169
+@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support]
139
- */
170
140
-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
171
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
141
+/* The delay time (in ms) between two COLO checkpoints */
172
secondary packet. If the packets are same, we will output primary
142
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
173
packet to outdev@var{chardevid}, else we will notify colo-frame
143
#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
174
do checkpoint and send primary packet to outdev@var{chardevid}.
144
#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
175
+if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len.
176
177
we must use it with the help of filter-mirror and filter-redirector.
178
145
179
--
146
--
180
2.7.4
147
2.5.0
181
148
182
149
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <zhangckid@gmail.com>
1
2
3
Make sure master start block replication after slave's block
4
replication started.
5
6
Besides, we need to activate VM's blocks before goes into
7
COLO state.
8
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
migration/colo.c | 43 +++++++++++++++++++++++++++++++++++++++++++
16
migration/migration.c | 10 ++++++++++
17
2 files changed, 53 insertions(+)
18
19
diff --git a/migration/colo.c b/migration/colo.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/migration/colo.c
22
+++ b/migration/colo.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "replication.h"
25
#include "net/colo-compare.h"
26
#include "net/colo.h"
27
+#include "block/block.h"
28
29
static bool vmstate_loading;
30
static Notifier packets_compare_notifier;
31
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
32
{
33
int old_state;
34
MigrationIncomingState *mis = migration_incoming_get_current();
35
+ Error *local_err = NULL;
36
37
/* Can not do failover during the process of VM's loading VMstate, Or
38
* it will break the secondary VM.
39
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
40
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
41
MIGRATION_STATUS_COMPLETED);
42
43
+ replication_stop_all(true, &local_err);
44
+ if (local_err) {
45
+ error_report_err(local_err);
46
+ }
47
+
48
if (!autostart) {
49
error_report("\"-S\" qemu option will be ignored in secondary side");
50
/* recover runstate to normal migration finish state */
51
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
52
{
53
MigrationState *s = migrate_get_current();
54
int old_state;
55
+ Error *local_err = NULL;
56
57
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
58
MIGRATION_STATUS_COMPLETED);
59
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
60
FailoverStatus_str(old_state));
61
return;
62
}
63
+
64
+ replication_stop_all(true, &local_err);
65
+ if (local_err) {
66
+ error_report_err(local_err);
67
+ local_err = NULL;
68
+ }
69
+
70
/* Notify COLO thread that failover work is finished */
71
qemu_sem_post(&s->colo_exit_sem);
72
}
73
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
74
qemu_savevm_state_header(fb);
75
qemu_savevm_state_setup(fb);
76
qemu_mutex_lock_iothread();
77
+ replication_do_checkpoint_all(&local_err);
78
+ if (local_err) {
79
+ qemu_mutex_unlock_iothread();
80
+ goto out;
81
+ }
82
qemu_savevm_state_complete_precopy(fb, false, false);
83
qemu_mutex_unlock_iothread();
84
85
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
86
object_unref(OBJECT(bioc));
87
88
qemu_mutex_lock_iothread();
89
+ replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
90
+ if (local_err) {
91
+ qemu_mutex_unlock_iothread();
92
+ goto out;
93
+ }
94
+
95
vm_start();
96
qemu_mutex_unlock_iothread();
97
trace_colo_vm_state_change("stop", "run");
98
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
99
object_unref(OBJECT(bioc));
100
101
qemu_mutex_lock_iothread();
102
+ replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
103
+ if (local_err) {
104
+ qemu_mutex_unlock_iothread();
105
+ goto out;
106
+ }
107
vm_start();
108
trace_colo_vm_state_change("stop", "run");
109
qemu_mutex_unlock_iothread();
110
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
111
goto out;
112
}
113
114
+ replication_get_error_all(&local_err);
115
+ if (local_err) {
116
+ qemu_mutex_unlock_iothread();
117
+ goto out;
118
+ }
119
+ /* discard colo disk buffer */
120
+ replication_do_checkpoint_all(&local_err);
121
+ if (local_err) {
122
+ qemu_mutex_unlock_iothread();
123
+ goto out;
124
+ }
125
+
126
vmstate_loading = false;
127
vm_start();
128
trace_colo_vm_state_change("stop", "run");
129
diff --git a/migration/migration.c b/migration/migration.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/migration/migration.c
132
+++ b/migration/migration.c
133
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
134
MigrationIncomingState *mis = migration_incoming_get_current();
135
PostcopyState ps;
136
int ret;
137
+ Error *local_err = NULL;
138
139
assert(mis->from_src_file);
140
mis->migration_incoming_co = qemu_coroutine_self();
141
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
142
143
/* we get COLO info, and know if we are in COLO mode */
144
if (!ret && migration_incoming_enable_colo()) {
145
+ /* Make sure all file formats flush their mutable metadata */
146
+ bdrv_invalidate_cache_all(&local_err);
147
+ if (local_err) {
148
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
149
+ MIGRATION_STATUS_FAILED);
150
+ error_report_err(local_err);
151
+ exit(EXIT_FAILURE);
152
+ }
153
+
154
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
155
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
156
mis->have_colo_incoming_thread = true;
157
--
158
2.5.0
159
160
diff view generated by jsdifflib
New patch
1
1
From: Zhang Chen <zhangckid@gmail.com>
2
3
We need to know if migration is going into COLO state for
4
incoming side before start normal migration.
5
6
Instead by using the VMStateDescription to send colo_state
7
from source side to destination side, we use MIG_CMD_ENABLE_COLO
8
to indicate whether COLO is enabled or not.
9
10
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
include/migration/colo.h | 5 ++--
17
migration/Makefile.objs | 2 +-
18
migration/colo-comm.c | 76 ------------------------------------------------
19
migration/colo.c | 13 ++++++++-
20
migration/migration.c | 23 ++++++++++++++-
21
migration/savevm.c | 17 +++++++++++
22
migration/savevm.h | 1 +
23
migration/trace-events | 1 +
24
vl.c | 2 --
25
9 files changed, 57 insertions(+), 83 deletions(-)
26
delete mode 100644 migration/colo-comm.c
27
28
diff --git a/include/migration/colo.h b/include/migration/colo.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/migration/colo.h
31
+++ b/include/migration/colo.h
32
@@ -XXX,XX +XXX,XX @@ void migrate_start_colo_process(MigrationState *s);
33
bool migration_in_colo_state(void);
34
35
/* loadvm */
36
-bool migration_incoming_enable_colo(void);
37
-void migration_incoming_exit_colo(void);
38
+void migration_incoming_enable_colo(void);
39
+void migration_incoming_disable_colo(void);
40
+bool migration_incoming_colo_enabled(void);
41
void *colo_process_incoming_thread(void *opaque);
42
bool migration_incoming_in_colo_state(void);
43
44
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
45
index XXXXXXX..XXXXXXX 100644
46
--- a/migration/Makefile.objs
47
+++ b/migration/Makefile.objs
48
@@ -XXX,XX +XXX,XX @@
49
common-obj-y += migration.o socket.o fd.o exec.o
50
common-obj-y += tls.o channel.o savevm.o
51
-common-obj-y += colo-comm.o colo.o colo-failover.o
52
+common-obj-y += colo.o colo-failover.o
53
common-obj-y += vmstate.o vmstate-types.o page_cache.o
54
common-obj-y += qemu-file.o global_state.o
55
common-obj-y += qemu-file-channel.o
56
diff --git a/migration/colo-comm.c b/migration/colo-comm.c
57
deleted file mode 100644
58
index XXXXXXX..XXXXXXX
59
--- a/migration/colo-comm.c
60
+++ /dev/null
61
@@ -XXX,XX +XXX,XX @@
62
-/*
63
- * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
64
- * (a.k.a. Fault Tolerance or Continuous Replication)
65
- *
66
- * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
67
- * Copyright (c) 2016 FUJITSU LIMITED
68
- * Copyright (c) 2016 Intel Corporation
69
- *
70
- * This work is licensed under the terms of the GNU GPL, version 2 or
71
- * later. See the COPYING file in the top-level directory.
72
- *
73
- */
74
-
75
-#include "qemu/osdep.h"
76
-#include "migration.h"
77
-#include "migration/colo.h"
78
-#include "migration/vmstate.h"
79
-#include "trace.h"
80
-
81
-typedef struct {
82
- bool colo_requested;
83
-} COLOInfo;
84
-
85
-static COLOInfo colo_info;
86
-
87
-COLOMode get_colo_mode(void)
88
-{
89
- if (migration_in_colo_state()) {
90
- return COLO_MODE_PRIMARY;
91
- } else if (migration_incoming_in_colo_state()) {
92
- return COLO_MODE_SECONDARY;
93
- } else {
94
- return COLO_MODE_UNKNOWN;
95
- }
96
-}
97
-
98
-static int colo_info_pre_save(void *opaque)
99
-{
100
- COLOInfo *s = opaque;
101
-
102
- s->colo_requested = migrate_colo_enabled();
103
-
104
- return 0;
105
-}
106
-
107
-static bool colo_info_need(void *opaque)
108
-{
109
- return migrate_colo_enabled();
110
-}
111
-
112
-static const VMStateDescription colo_state = {
113
- .name = "COLOState",
114
- .version_id = 1,
115
- .minimum_version_id = 1,
116
- .pre_save = colo_info_pre_save,
117
- .needed = colo_info_need,
118
- .fields = (VMStateField[]) {
119
- VMSTATE_BOOL(colo_requested, COLOInfo),
120
- VMSTATE_END_OF_LIST()
121
- },
122
-};
123
-
124
-void colo_info_init(void)
125
-{
126
- vmstate_register(NULL, 0, &colo_state, &colo_info);
127
-}
128
-
129
-bool migration_incoming_enable_colo(void)
130
-{
131
- return colo_info.colo_requested;
132
-}
133
-
134
-void migration_incoming_exit_colo(void)
135
-{
136
- colo_info.colo_requested = false;
137
-}
138
diff --git a/migration/colo.c b/migration/colo.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/migration/colo.c
141
+++ b/migration/colo.c
142
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
143
qemu_sem_post(&s->colo_exit_sem);
144
}
145
146
+COLOMode get_colo_mode(void)
147
+{
148
+ if (migration_in_colo_state()) {
149
+ return COLO_MODE_PRIMARY;
150
+ } else if (migration_incoming_in_colo_state()) {
151
+ return COLO_MODE_SECONDARY;
152
+ } else {
153
+ return COLO_MODE_UNKNOWN;
154
+ }
155
+}
156
+
157
void colo_do_failover(MigrationState *s)
158
{
159
/* Make sure VM stopped while failover happened. */
160
@@ -XXX,XX +XXX,XX @@ out:
161
if (mis->to_src_file) {
162
qemu_fclose(mis->to_src_file);
163
}
164
- migration_incoming_exit_colo();
165
+ migration_incoming_disable_colo();
166
167
rcu_unregister_thread();
168
return NULL;
169
diff --git a/migration/migration.c b/migration/migration.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/migration/migration.c
172
+++ b/migration/migration.c
173
@@ -XXX,XX +XXX,XX @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
174
return migrate_send_rp_message(mis, msg_type, msglen, bufc);
175
}
176
177
+static bool migration_colo_enabled;
178
+bool migration_incoming_colo_enabled(void)
179
+{
180
+ return migration_colo_enabled;
181
+}
182
+
183
+void migration_incoming_disable_colo(void)
184
+{
185
+ migration_colo_enabled = false;
186
+}
187
+
188
+void migration_incoming_enable_colo(void)
189
+{
190
+ migration_colo_enabled = true;
191
+}
192
+
193
void qemu_start_incoming_migration(const char *uri, Error **errp)
194
{
195
const char *p;
196
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
197
}
198
199
/* we get COLO info, and know if we are in COLO mode */
200
- if (!ret && migration_incoming_enable_colo()) {
201
+ if (!ret && migration_incoming_colo_enabled()) {
202
/* Make sure all file formats flush their mutable metadata */
203
bdrv_invalidate_cache_all(&local_err);
204
if (local_err) {
205
@@ -XXX,XX +XXX,XX @@ static void *migration_thread(void *opaque)
206
qemu_savevm_send_postcopy_advise(s->to_dst_file);
207
}
208
209
+ if (migrate_colo_enabled()) {
210
+ /* Notify migration destination that we enable COLO */
211
+ qemu_savevm_send_colo_enable(s->to_dst_file);
212
+ }
213
+
214
qemu_savevm_state_setup(s->to_dst_file);
215
216
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
217
diff --git a/migration/savevm.c b/migration/savevm.c
218
index XXXXXXX..XXXXXXX 100644
219
--- a/migration/savevm.c
220
+++ b/migration/savevm.c
221
@@ -XXX,XX +XXX,XX @@
222
#include "io/channel-file.h"
223
#include "sysemu/replay.h"
224
#include "qjson.h"
225
+#include "migration/colo.h"
226
227
#ifndef ETH_P_RARP
228
#define ETH_P_RARP 0x8035
229
@@ -XXX,XX +XXX,XX @@ enum qemu_vm_cmd {
230
were previously sent during
231
precopy but are dirty. */
232
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
233
+ MIG_CMD_ENABLE_COLO, /* Enable COLO */
234
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
235
MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
236
MIG_CMD_MAX
237
@@ -XXX,XX +XXX,XX @@ static void qemu_savevm_command_send(QEMUFile *f,
238
qemu_fflush(f);
239
}
240
241
+void qemu_savevm_send_colo_enable(QEMUFile *f)
242
+{
243
+ trace_savevm_send_colo_enable();
244
+ qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
245
+}
246
+
247
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
248
{
249
uint32_t buf;
250
@@ -XXX,XX +XXX,XX @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
251
return 0;
252
}
253
254
+static int loadvm_process_enable_colo(MigrationIncomingState *mis)
255
+{
256
+ migration_incoming_enable_colo();
257
+ return 0;
258
+}
259
+
260
/*
261
* Process an incoming 'QEMU_VM_COMMAND'
262
* 0 just a normal return
263
@@ -XXX,XX +XXX,XX @@ static int loadvm_process_command(QEMUFile *f)
264
265
case MIG_CMD_RECV_BITMAP:
266
return loadvm_handle_recv_bitmap(mis, len);
267
+
268
+ case MIG_CMD_ENABLE_COLO:
269
+ return loadvm_process_enable_colo(mis);
270
}
271
272
return 0;
273
diff --git a/migration/savevm.h b/migration/savevm.h
274
index XXXXXXX..XXXXXXX 100644
275
--- a/migration/savevm.h
276
+++ b/migration/savevm.h
277
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
278
uint16_t len,
279
uint64_t *start_list,
280
uint64_t *length_list);
281
+void qemu_savevm_send_colo_enable(QEMUFile *f);
282
283
int qemu_loadvm_state(QEMUFile *f);
284
void qemu_loadvm_state_cleanup(void);
285
diff --git a/migration/trace-events b/migration/trace-events
286
index XXXXXXX..XXXXXXX 100644
287
--- a/migration/trace-events
288
+++ b/migration/trace-events
289
@@ -XXX,XX +XXX,XX @@ savevm_send_ping(uint32_t val) "0x%x"
290
savevm_send_postcopy_listen(void) ""
291
savevm_send_postcopy_run(void) ""
292
savevm_send_postcopy_resume(void) ""
293
+savevm_send_colo_enable(void) ""
294
savevm_send_recv_bitmap(char *name) "%s"
295
savevm_state_setup(void) ""
296
savevm_state_resume_prepare(void) ""
297
diff --git a/vl.c b/vl.c
298
index XXXXXXX..XXXXXXX 100644
299
--- a/vl.c
300
+++ b/vl.c
301
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
302
#endif
303
}
304
305
- colo_info_init();
306
-
307
if (net_init_clients(&err) < 0) {
308
error_report_err(err);
309
exit(1);
310
--
311
2.5.0
312
313
diff view generated by jsdifflib
New patch
1
1
From: Zhang Chen <zhangckid@gmail.com>
2
3
We should not load PVM's state directly into SVM, because there maybe some
4
errors happen when SVM is receving data, which will break SVM.
5
6
We need to ensure receving all data before load the state into SVM. We use
7
an extra memory to cache these data (PVM's ram). The ram cache in secondary side
8
is initially the same as SVM/PVM's memory. And in the process of checkpoint,
9
we cache the dirty pages of PVM into this ram cache firstly, so this ram cache
10
always the same as PVM's memory at every checkpoint, then we flush this cached ram
11
to SVM after we receive all PVM's state.
12
13
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
14
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
15
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
16
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
17
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
20
include/exec/ram_addr.h | 1 +
21
migration/migration.c | 7 +++++
22
migration/ram.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++--
23
migration/ram.h | 4 +++
24
migration/savevm.c | 2 +-
25
5 files changed, 94 insertions(+), 3 deletions(-)
26
27
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/exec/ram_addr.h
30
+++ b/include/exec/ram_addr.h
31
@@ -XXX,XX +XXX,XX @@ struct RAMBlock {
32
struct rcu_head rcu;
33
struct MemoryRegion *mr;
34
uint8_t *host;
35
+ uint8_t *colo_cache; /* For colo, VM's ram cache */
36
ram_addr_t offset;
37
ram_addr_t used_length;
38
ram_addr_t max_length;
39
diff --git a/migration/migration.c b/migration/migration.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/migration/migration.c
42
+++ b/migration/migration.c
43
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
44
exit(EXIT_FAILURE);
45
}
46
47
+ if (colo_init_ram_cache() < 0) {
48
+ error_report("Init ram cache failed");
49
+ exit(EXIT_FAILURE);
50
+ }
51
+
52
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
53
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
54
mis->have_colo_incoming_thread = true;
55
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
56
57
/* Wait checkpoint incoming thread exit before free resource */
58
qemu_thread_join(&mis->colo_incoming_thread);
59
+ /* We hold the global iothread lock, so it is safe here */
60
+ colo_release_ram_cache();
61
}
62
63
if (ret < 0) {
64
diff --git a/migration/ram.c b/migration/ram.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/migration/ram.c
67
+++ b/migration/ram.c
68
@@ -XXX,XX +XXX,XX @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
69
return block->host + offset;
70
}
71
72
+static inline void *colo_cache_from_block_offset(RAMBlock *block,
73
+ ram_addr_t offset)
74
+{
75
+ if (!offset_in_ramblock(block, offset)) {
76
+ return NULL;
77
+ }
78
+ if (!block->colo_cache) {
79
+ error_report("%s: colo_cache is NULL in block :%s",
80
+ __func__, block->idstr);
81
+ return NULL;
82
+ }
83
+ return block->colo_cache + offset;
84
+}
85
+
86
/**
87
* ram_handle_compressed: handle the zero page case
88
*
89
@@ -XXX,XX +XXX,XX @@ static void decompress_data_with_multi_threads(QEMUFile *f,
90
qemu_mutex_unlock(&decomp_done_lock);
91
}
92
93
+/*
94
+ * colo cache: this is for secondary VM, we cache the whole
95
+ * memory of the secondary VM, it is need to hold the global lock
96
+ * to call this helper.
97
+ */
98
+int colo_init_ram_cache(void)
99
+{
100
+ RAMBlock *block;
101
+
102
+ rcu_read_lock();
103
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
104
+ block->colo_cache = qemu_anon_ram_alloc(block->used_length,
105
+ NULL,
106
+ false);
107
+ if (!block->colo_cache) {
108
+ error_report("%s: Can't alloc memory for COLO cache of block %s,"
109
+ "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
110
+ block->used_length);
111
+ goto out_locked;
112
+ }
113
+ memcpy(block->colo_cache, block->host, block->used_length);
114
+ }
115
+ rcu_read_unlock();
116
+ return 0;
117
+
118
+out_locked:
119
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
120
+ if (block->colo_cache) {
121
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
122
+ block->colo_cache = NULL;
123
+ }
124
+ }
125
+
126
+ rcu_read_unlock();
127
+ return -errno;
128
+}
129
+
130
+/* It is need to hold the global lock to call this helper */
131
+void colo_release_ram_cache(void)
132
+{
133
+ RAMBlock *block;
134
+
135
+ rcu_read_lock();
136
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
137
+ if (block->colo_cache) {
138
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
139
+ block->colo_cache = NULL;
140
+ }
141
+ }
142
+ rcu_read_unlock();
143
+}
144
+
145
/**
146
* ram_load_setup: Setup RAM for migration incoming side
147
*
148
@@ -XXX,XX +XXX,XX @@ static int ram_load_setup(QEMUFile *f, void *opaque)
149
150
xbzrle_load_setup();
151
ramblock_recv_map_init();
152
+
153
return 0;
154
}
155
156
@@ -XXX,XX +XXX,XX @@ static int ram_load_cleanup(void *opaque)
157
g_free(rb->receivedmap);
158
rb->receivedmap = NULL;
159
}
160
+
161
return 0;
162
}
163
164
@@ -XXX,XX +XXX,XX @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
165
RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
166
RAMBlock *block = ram_block_from_stream(f, flags);
167
168
- host = host_from_ram_block_offset(block, addr);
169
+ /*
170
+ * After going into COLO, we should load the Page into colo_cache.
171
+ */
172
+ if (migration_incoming_in_colo_state()) {
173
+ host = colo_cache_from_block_offset(block, addr);
174
+ } else {
175
+ host = host_from_ram_block_offset(block, addr);
176
+ }
177
if (!host) {
178
error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
179
ret = -EINVAL;
180
break;
181
}
182
- ramblock_recv_bitmap_set(block, host);
183
+
184
+ if (!migration_incoming_in_colo_state()) {
185
+ ramblock_recv_bitmap_set(block, host);
186
+ }
187
+
188
trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
189
}
190
191
diff --git a/migration/ram.h b/migration/ram.h
192
index XXXXXXX..XXXXXXX 100644
193
--- a/migration/ram.h
194
+++ b/migration/ram.h
195
@@ -XXX,XX +XXX,XX @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
196
const char *block_name);
197
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
198
199
+/* ram cache */
200
+int colo_init_ram_cache(void);
201
+void colo_release_ram_cache(void);
202
+
203
#endif
204
diff --git a/migration/savevm.c b/migration/savevm.c
205
index XXXXXXX..XXXXXXX 100644
206
--- a/migration/savevm.c
207
+++ b/migration/savevm.c
208
@@ -XXX,XX +XXX,XX @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
209
static int loadvm_process_enable_colo(MigrationIncomingState *mis)
210
{
211
migration_incoming_enable_colo();
212
- return 0;
213
+ return colo_init_ram_cache();
214
}
215
216
/*
217
--
218
2.5.0
219
220
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <zhangckid@gmail.com>
1
2
3
We record the address of the dirty pages that received,
4
it will help flushing pages that cached into SVM.
5
6
Here, it is a trick, we record dirty pages by re-using migration
7
dirty bitmap. In the later patch, we will start the dirty log
8
for SVM, just like migration, in this way, we can record both
9
the dirty pages caused by PVM and SVM, we only flush those dirty
10
pages from RAM cache while do checkpoint.
11
12
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
migration/ram.c | 43 ++++++++++++++++++++++++++++++++++++++++---
19
1 file changed, 40 insertions(+), 3 deletions(-)
20
21
diff --git a/migration/ram.c b/migration/ram.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/migration/ram.c
24
+++ b/migration/ram.c
25
@@ -XXX,XX +XXX,XX @@ static inline void *colo_cache_from_block_offset(RAMBlock *block,
26
__func__, block->idstr);
27
return NULL;
28
}
29
+
30
+ /*
31
+ * During colo checkpoint, we need bitmap of these migrated pages.
32
+ * It help us to decide which pages in ram cache should be flushed
33
+ * into VM's RAM later.
34
+ */
35
+ if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
36
+ ram_state->migration_dirty_pages++;
37
+ }
38
return block->colo_cache + offset;
39
}
40
41
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
42
RAMBlock *block;
43
44
rcu_read_lock();
45
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
46
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
47
block->colo_cache = qemu_anon_ram_alloc(block->used_length,
48
NULL,
49
false);
50
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
51
memcpy(block->colo_cache, block->host, block->used_length);
52
}
53
rcu_read_unlock();
54
+ /*
55
+ * Record the dirty pages that sent by PVM, we use this dirty bitmap together
56
+ * with to decide which page in cache should be flushed into SVM's RAM. Here
57
+ * we use the same name 'ram_bitmap' as for migration.
58
+ */
59
+ if (ram_bytes_total()) {
60
+ RAMBlock *block;
61
+
62
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
63
+ unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
64
+
65
+ block->bmap = bitmap_new(pages);
66
+ bitmap_set(block->bmap, 0, pages);
67
+ }
68
+ }
69
+ ram_state = g_new0(RAMState, 1);
70
+ ram_state->migration_dirty_pages = 0;
71
+
72
return 0;
73
74
out_locked:
75
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
76
+
77
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
78
if (block->colo_cache) {
79
qemu_anon_ram_free(block->colo_cache, block->used_length);
80
block->colo_cache = NULL;
81
@@ -XXX,XX +XXX,XX @@ void colo_release_ram_cache(void)
82
{
83
RAMBlock *block;
84
85
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
86
+ g_free(block->bmap);
87
+ block->bmap = NULL;
88
+ }
89
+
90
rcu_read_lock();
91
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
92
+
93
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
94
if (block->colo_cache) {
95
qemu_anon_ram_free(block->colo_cache, block->used_length);
96
block->colo_cache = NULL;
97
}
98
}
99
+
100
rcu_read_unlock();
101
+ g_free(ram_state);
102
+ ram_state = NULL;
103
}
104
105
/**
106
--
107
2.5.0
108
109
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <zhangckid@gmail.com>
2
2
3
We add the vnet_hdr_support option for filter-mirror, default is disabled.
3
During the time of VM's running, PVM may dirty some pages, we will transfer
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
PVM's dirty pages to SVM and store them into SVM's RAM cache at next checkpoint
5
You can use it for example:
5
time. So, the content of SVM's RAM cache will always be same with PVM's memory
6
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
6
after checkpoint.
7
7
8
If it has vnet_hdr_support flag, we will change the sending packet format from
8
Instead of flushing all content of PVM's RAM cache into SVM's MEMORY,
9
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}.
9
we do this in a more efficient way:
10
make other module(like colo-compare) know how to parse net packet correctly.
10
Only flush any page that dirtied by PVM since last checkpoint.
11
In this way, we can ensure SVM's memory same with PVM's.
11
12
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
13
Besides, we must ensure flush RAM cache before load device state.
14
15
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
16
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
17
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
19
---
15
net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++-
20
migration/ram.c | 37 +++++++++++++++++++++++++++++++++++++
16
qemu-options.hx | 5 ++---
21
migration/trace-events | 2 ++
17
2 files changed, 43 insertions(+), 4 deletions(-)
22
2 files changed, 39 insertions(+)
18
23
19
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
24
diff --git a/migration/ram.c b/migration/ram.c
20
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
21
--- a/net/filter-mirror.c
26
--- a/migration/ram.c
22
+++ b/net/filter-mirror.c
27
+++ b/migration/ram.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
28
@@ -XXX,XX +XXX,XX @@ static bool postcopy_is_running(void)
24
CharBackend chr_in;
29
return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
25
CharBackend chr_out;
30
}
26
SocketReadState rs;
31
27
+ bool vnet_hdr;
32
+/*
28
} MirrorState;
33
+ * Flush content of RAM cache into SVM's memory.
29
34
+ * Only flush the pages that be dirtied by PVM or SVM or both.
30
static int filter_send(MirrorState *s,
35
+ */
31
const struct iovec *iov,
36
+static void colo_flush_ram_cache(void)
32
int iovcnt)
37
+{
33
{
38
+ RAMBlock *block = NULL;
34
+ NetFilterState *nf = NETFILTER(s);
39
+ void *dst_host;
35
int ret = 0;
40
+ void *src_host;
36
ssize_t size = 0;
41
+ unsigned long offset = 0;
37
uint32_t len = 0;
38
@@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s,
39
goto err;
40
}
41
42
+ if (s->vnet_hdr) {
43
+ /*
44
+ * If vnet_hdr = on, we send vnet header len to make other
45
+ * module(like colo-compare) know how to parse net
46
+ * packet correctly.
47
+ */
48
+ ssize_t vnet_hdr_len;
49
+
42
+
50
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
43
+ trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
44
+ rcu_read_lock();
45
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
51
+
46
+
52
+ len = htonl(vnet_hdr_len);
47
+ while (block) {
53
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
48
+ offset = migration_bitmap_find_dirty(ram_state, block, offset);
54
+ if (ret != sizeof(len)) {
49
+
55
+ goto err;
50
+ if (offset << TARGET_PAGE_BITS >= block->used_length) {
51
+ offset = 0;
52
+ block = QLIST_NEXT_RCU(block, next);
53
+ } else {
54
+ migration_bitmap_clear_dirty(ram_state, block, offset);
55
+ dst_host = block->host + (offset << TARGET_PAGE_BITS);
56
+ src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
57
+ memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
56
+ }
58
+ }
57
+ }
59
+ }
58
+
60
+
59
buf = g_malloc(size);
61
+ rcu_read_unlock();
60
iov_to_buf(iov, iovcnt, 0, buf, size);
62
+ trace_colo_flush_ram_cache_end();
61
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
62
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
63
}
64
}
65
66
- net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
67
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
68
69
if (s->indev) {
70
chr = qemu_chr_find(s->indev);
71
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_set_outdev(Object *obj,
72
}
73
}
74
75
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
76
+{
77
+ MirrorState *s = FILTER_MIRROR(obj);
78
+
79
+ return s->vnet_hdr;
80
+}
63
+}
81
+
64
+
82
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
65
static int ram_load(QEMUFile *f, void *opaque, int version_id)
83
+{
66
{
84
+ MirrorState *s = FILTER_MIRROR(obj);
67
int flags = 0, ret = 0, invalid_flags = 0;
68
@@ -XXX,XX +XXX,XX @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
69
ret |= wait_for_decompress_done();
70
rcu_read_unlock();
71
trace_ram_load_complete(ret, seq_iter);
85
+
72
+
86
+ s->vnet_hdr = value;
73
+ if (!ret && migration_incoming_in_colo_state()) {
87
+}
74
+ colo_flush_ram_cache();
88
+
75
+ }
89
static char *filter_redirector_get_outdev(Object *obj, Error **errp)
76
return ret;
90
{
91
MirrorState *s = FILTER_REDIRECTOR(obj);
92
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
93
94
static void filter_mirror_init(Object *obj)
95
{
96
+ MirrorState *s = FILTER_MIRROR(obj);
97
+
98
object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
99
filter_mirror_set_outdev, NULL);
100
+
101
+ s->vnet_hdr = false;
102
+ object_property_add_bool(obj, "vnet_hdr_support",
103
+ filter_mirror_get_vnet_hdr,
104
+ filter_mirror_set_vnet_hdr, NULL);
105
}
77
}
106
78
107
static void filter_redirector_init(Object *obj)
79
diff --git a/migration/trace-events b/migration/trace-events
108
diff --git a/qemu-options.hx b/qemu-options.hx
109
index XXXXXXX..XXXXXXX 100644
80
index XXXXXXX..XXXXXXX 100644
110
--- a/qemu-options.hx
81
--- a/migration/trace-events
111
+++ b/qemu-options.hx
82
+++ b/migration/trace-events
112
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
83
@@ -XXX,XX +XXX,XX @@ ram_dirty_bitmap_sync_start(void) ""
113
@option{tx}: the filter is attached to the transmit queue of the netdev,
84
ram_dirty_bitmap_sync_wait(void) ""
114
where it will receive packets sent by the netdev.
85
ram_dirty_bitmap_sync_complete(void) ""
115
86
ram_state_resume_prepare(uint64_t v) "%" PRId64
116
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
87
+colo_flush_ram_cache_begin(uint64_t dirty_pages) "dirty_pages %" PRIu64
117
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
88
+colo_flush_ram_cache_end(void) ""
118
89
119
-filter-mirror on netdev @var{netdevid},mirror net packet to chardev
90
# migration/migration.c
120
-@var{chardevid}
91
await_return_path_close_on_source_close(void) ""
121
+filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
122
123
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
124
outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
125
--
92
--
126
2.7.4
93
2.5.0
127
94
128
95
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
2
3
This patch change the filter_send() parameter from CharBackend to MirrorState,
3
If some errors happen during VM's COLO FT stage, it's important to
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
notify the users of this event. Together with 'x-colo-lost-heartbeat',
5
Users can intervene in COLO's failover work immediately.
6
If users don't want to get involved in COLO's failover verdict,
7
it is still necessary to notify users that we exited COLO mode.
5
8
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
14
---
9
net/filter-mirror.c | 10 +++++-----
15
migration/colo.c | 31 +++++++++++++++++++++++++++++++
10
1 file changed, 5 insertions(+), 5 deletions(-)
16
qapi/migration.json | 38 ++++++++++++++++++++++++++++++++++++++
17
2 files changed, 69 insertions(+)
11
18
12
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
19
diff --git a/migration/colo.c b/migration/colo.c
13
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
14
--- a/net/filter-mirror.c
21
--- a/migration/colo.c
15
+++ b/net/filter-mirror.c
22
+++ b/migration/colo.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
23
@@ -XXX,XX +XXX,XX @@
17
SocketReadState rs;
24
#include "net/colo-compare.h"
18
} MirrorState;
25
#include "net/colo.h"
19
26
#include "block/block.h"
20
-static int filter_send(CharBackend *chr_out,
27
+#include "qapi/qapi-events-migration.h"
21
+static int filter_send(MirrorState *s,
28
22
const struct iovec *iov,
29
static bool vmstate_loading;
23
int iovcnt)
30
static Notifier packets_compare_notifier;
24
{
31
@@ -XXX,XX +XXX,XX @@ out:
25
@@ -XXX,XX +XXX,XX @@ static int filter_send(CharBackend *chr_out,
32
qemu_fclose(fb);
26
}
33
}
27
34
28
len = htonl(size);
35
+ /*
29
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len));
36
+ * There are only two reasons we can get here, some error happened
30
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
37
+ * or the user triggered failover.
31
if (ret != sizeof(len)) {
38
+ */
32
goto err;
39
+ switch (failover_get_state()) {
40
+ case FAILOVER_STATUS_NONE:
41
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
42
+ COLO_EXIT_REASON_ERROR);
43
+ break;
44
+ case FAILOVER_STATUS_REQUIRE:
45
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
46
+ COLO_EXIT_REASON_REQUEST);
47
+ break;
48
+ default:
49
+ abort();
50
+ }
51
+
52
/* Hope this not to be too long to wait here */
53
qemu_sem_wait(&s->colo_exit_sem);
54
qemu_sem_destroy(&s->colo_exit_sem);
55
@@ -XXX,XX +XXX,XX @@ out:
56
error_report_err(local_err);
33
}
57
}
34
58
35
buf = g_malloc(size);
59
+ switch (failover_get_state()) {
36
iov_to_buf(iov, iovcnt, 0, buf, size);
60
+ case FAILOVER_STATUS_NONE:
37
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size);
61
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
38
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
62
+ COLO_EXIT_REASON_ERROR);
39
g_free(buf);
63
+ break;
40
if (ret != size) {
64
+ case FAILOVER_STATUS_REQUIRE:
41
goto err;
65
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
42
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
66
+ COLO_EXIT_REASON_REQUEST);
43
MirrorState *s = FILTER_MIRROR(nf);
67
+ break;
44
int ret;
68
+ default:
45
69
+ abort();
46
- ret = filter_send(&s->chr_out, iov, iovcnt);
70
+ }
47
+ ret = filter_send(s, iov, iovcnt);
71
+
48
if (ret) {
72
if (fb) {
49
error_report("filter mirror send failed(%s)", strerror(-ret));
73
qemu_fclose(fb);
50
}
74
}
51
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
75
diff --git a/qapi/migration.json b/qapi/migration.json
52
int ret;
76
index XXXXXXX..XXXXXXX 100644
53
77
--- a/qapi/migration.json
54
if (qemu_chr_fe_backend_connected(&s->chr_out)) {
78
+++ b/qapi/migration.json
55
- ret = filter_send(&s->chr_out, iov, iovcnt);
79
@@ -XXX,XX +XXX,XX @@
56
+ ret = filter_send(s, iov, iovcnt);
80
'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] }
57
if (ret) {
81
58
error_report("filter redirector send failed(%s)", strerror(-ret));
82
##
59
}
83
+# @COLO_EXIT:
84
+#
85
+# Emitted when VM finishes COLO mode due to some errors happening or
86
+# at the request of users.
87
+#
88
+# @mode: report COLO mode when COLO exited.
89
+#
90
+# @reason: describes the reason for the COLO exit.
91
+#
92
+# Since: 3.1
93
+#
94
+# Example:
95
+#
96
+# <- { "timestamp": {"seconds": 2032141960, "microseconds": 417172},
97
+# "event": "COLO_EXIT", "data": {"mode": "primary", "reason": "request" } }
98
+#
99
+##
100
+{ 'event': 'COLO_EXIT',
101
+ 'data': {'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
102
+
103
+##
104
+# @COLOExitReason:
105
+#
106
+# The reason for a COLO exit
107
+#
108
+# @none: no failover has ever happened. This can't occur in the
109
+# COLO_EXIT event, only in the result of query-colo-status.
110
+#
111
+# @request: COLO exit is due to an external request
112
+#
113
+# @error: COLO exit is due to an internal error
114
+#
115
+# Since: 3.1
116
+##
117
+{ 'enum': 'COLOExitReason',
118
+ 'data': [ 'none', 'request', 'error' ] }
119
+
120
+##
121
# @x-colo-lost-heartbeat:
122
#
123
# Tell qemu that heartbeat is lost, request it to do takeover procedures.
60
--
124
--
61
2.7.4
125
2.5.0
62
126
63
127
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
Suggested by Markus Armbruster rename COLO unknown mode to none mode.
4
5
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
6
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo-failover.c | 2 +-
12
migration/colo.c | 2 +-
13
qapi/migration.json | 10 +++++-----
14
3 files changed, 7 insertions(+), 7 deletions(-)
15
16
diff --git a/migration/colo-failover.c b/migration/colo-failover.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/migration/colo-failover.c
19
+++ b/migration/colo-failover.c
20
@@ -XXX,XX +XXX,XX @@ FailoverStatus failover_get_state(void)
21
22
void qmp_x_colo_lost_heartbeat(Error **errp)
23
{
24
- if (get_colo_mode() == COLO_MODE_UNKNOWN) {
25
+ if (get_colo_mode() == COLO_MODE_NONE) {
26
error_setg(errp, QERR_FEATURE_DISABLED, "colo");
27
return;
28
}
29
diff --git a/migration/colo.c b/migration/colo.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/migration/colo.c
32
+++ b/migration/colo.c
33
@@ -XXX,XX +XXX,XX @@ COLOMode get_colo_mode(void)
34
} else if (migration_incoming_in_colo_state()) {
35
return COLO_MODE_SECONDARY;
36
} else {
37
- return COLO_MODE_UNKNOWN;
38
+ return COLO_MODE_NONE;
39
}
40
}
41
42
diff --git a/qapi/migration.json b/qapi/migration.json
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qapi/migration.json
45
+++ b/qapi/migration.json
46
@@ -XXX,XX +XXX,XX @@
47
##
48
# @COLOMode:
49
#
50
-# The colo mode
51
+# The COLO current mode.
52
#
53
-# @unknown: unknown mode
54
+# @none: COLO is disabled.
55
#
56
-# @primary: master side
57
+# @primary: COLO node in primary side.
58
#
59
-# @secondary: slave side
60
+# @secondary: COLO node in slave side.
61
#
62
# Since: 2.8
63
##
64
{ 'enum': 'COLOMode',
65
- 'data': [ 'unknown', 'primary', 'secondary'] }
66
+ 'data': [ 'none', 'primary', 'secondary'] }
67
68
##
69
# @FailoverStatus:
70
--
71
2.5.0
72
73
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <zhangckid@gmail.com>
1
2
3
Libvirt or other high level software can use this command query colo status.
4
You can test this command like that:
5
{'execute':'query-colo-status'}
6
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo.c | 21 +++++++++++++++++++++
12
qapi/migration.json | 32 ++++++++++++++++++++++++++++++++
13
2 files changed, 53 insertions(+)
14
15
diff --git a/migration/colo.c b/migration/colo.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/migration/colo.c
18
+++ b/migration/colo.c
19
@@ -XXX,XX +XXX,XX @@
20
#include "net/colo.h"
21
#include "block/block.h"
22
#include "qapi/qapi-events-migration.h"
23
+#include "qapi/qmp/qerror.h"
24
25
static bool vmstate_loading;
26
static Notifier packets_compare_notifier;
27
@@ -XXX,XX +XXX,XX @@ void qmp_xen_colo_do_checkpoint(Error **errp)
28
#endif
29
}
30
31
+COLOStatus *qmp_query_colo_status(Error **errp)
32
+{
33
+ COLOStatus *s = g_new0(COLOStatus, 1);
34
+
35
+ s->mode = get_colo_mode();
36
+
37
+ switch (failover_get_state()) {
38
+ case FAILOVER_STATUS_NONE:
39
+ s->reason = COLO_EXIT_REASON_NONE;
40
+ break;
41
+ case FAILOVER_STATUS_REQUIRE:
42
+ s->reason = COLO_EXIT_REASON_REQUEST;
43
+ break;
44
+ default:
45
+ s->reason = COLO_EXIT_REASON_ERROR;
46
+ }
47
+
48
+ return s;
49
+}
50
+
51
static void colo_send_message(QEMUFile *f, COLOMessage msg,
52
Error **errp)
53
{
54
diff --git a/qapi/migration.json b/qapi/migration.json
55
index XXXXXXX..XXXXXXX 100644
56
--- a/qapi/migration.json
57
+++ b/qapi/migration.json
58
@@ -XXX,XX +XXX,XX @@
59
{ 'command': 'xen-colo-do-checkpoint' }
60
61
##
62
+# @COLOStatus:
63
+#
64
+# The result format for 'query-colo-status'.
65
+#
66
+# @mode: COLO running mode. If COLO is running, this field will return
67
+# 'primary' or 'secondary'.
68
+#
69
+# @reason: describes the reason for the COLO exit.
70
+#
71
+# Since: 3.0
72
+##
73
+{ 'struct': 'COLOStatus',
74
+ 'data': { 'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
75
+
76
+##
77
+# @query-colo-status:
78
+#
79
+# Query COLO status while the vm is running.
80
+#
81
+# Returns: A @COLOStatus object showing the status.
82
+#
83
+# Example:
84
+#
85
+# -> { "execute": "query-colo-status" }
86
+# <- { "return": { "mode": "primary", "active": true, "reason": "request" } }
87
+#
88
+# Since: 3.0
89
+##
90
+{ 'command': 'query-colo-status',
91
+ 'returns': 'COLOStatus' }
92
+
93
+##
94
# @migrate-recover:
95
#
96
# Provide a recovery migration stream URI.
97
--
98
2.5.0
99
100
diff view generated by jsdifflib
New patch
1
1
From: Zhang Chen <zhangckid@gmail.com>
2
3
There are several stages during loadvm/savevm process. In different stage,
4
migration incoming processes different types of sections.
5
We want to control these stages more accuracy, it will benefit COLO
6
performance, we don't have to save type of QEMU_VM_SECTION_START
7
sections everytime while do checkpoint, besides, we want to separate
8
the process of saving/loading memory and devices state.
9
10
So we add three new helper functions: qemu_load_device_state() and
11
qemu_savevm_live_state() to achieve different process during migration.
12
13
Besides, we make qemu_loadvm_state_main() and qemu_save_device_state()
14
public, and simplify the codes of qemu_save_device_state() by calling the
15
wrapper qemu_savevm_state_header().
16
17
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
18
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
19
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
20
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
21
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
22
Signed-off-by: Jason Wang <jasowang@redhat.com>
23
---
24
migration/colo.c | 41 ++++++++++++++++++++++++++++++++---------
25
migration/savevm.c | 36 +++++++++++++++++++++++++++++-------
26
migration/savevm.h | 4 ++++
27
3 files changed, 65 insertions(+), 16 deletions(-)
28
29
diff --git a/migration/colo.c b/migration/colo.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/migration/colo.c
32
+++ b/migration/colo.c
33
@@ -XXX,XX +XXX,XX @@
34
#include "block/block.h"
35
#include "qapi/qapi-events-migration.h"
36
#include "qapi/qmp/qerror.h"
37
+#include "sysemu/cpus.h"
38
39
static bool vmstate_loading;
40
static Notifier packets_compare_notifier;
41
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
42
43
/* Disable block migration */
44
migrate_set_block_enabled(false, &local_err);
45
- qemu_savevm_state_header(fb);
46
- qemu_savevm_state_setup(fb);
47
qemu_mutex_lock_iothread();
48
replication_do_checkpoint_all(&local_err);
49
if (local_err) {
50
qemu_mutex_unlock_iothread();
51
goto out;
52
}
53
- qemu_savevm_state_complete_precopy(fb, false, false);
54
- qemu_mutex_unlock_iothread();
55
-
56
- qemu_fflush(fb);
57
58
colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
59
if (local_err) {
60
+ qemu_mutex_unlock_iothread();
61
+ goto out;
62
+ }
63
+ /* Note: device state is saved into buffer */
64
+ ret = qemu_save_device_state(fb);
65
+
66
+ qemu_mutex_unlock_iothread();
67
+ if (ret < 0) {
68
goto out;
69
}
70
/*
71
+ * Only save VM's live state, which not including device state.
72
+ * TODO: We may need a timeout mechanism to prevent COLO process
73
+ * to be blocked here.
74
+ */
75
+ qemu_savevm_live_state(s->to_dst_file);
76
+
77
+ qemu_fflush(fb);
78
+
79
+ /*
80
* We need the size of the VMstate data in Secondary side,
81
* With which we can decide how much data should be read.
82
*/
83
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
84
uint64_t total_size;
85
uint64_t value;
86
Error *local_err = NULL;
87
+ int ret;
88
89
rcu_register_thread();
90
qemu_sem_init(&mis->colo_incoming_sem, 0);
91
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
92
goto out;
93
}
94
95
+ qemu_mutex_lock_iothread();
96
+ cpu_synchronize_all_pre_loadvm();
97
+ ret = qemu_loadvm_state_main(mis->from_src_file, mis);
98
+ qemu_mutex_unlock_iothread();
99
+
100
+ if (ret < 0) {
101
+ error_report("Load VM's live state (ram) error");
102
+ goto out;
103
+ }
104
+
105
value = colo_receive_message_value(mis->from_src_file,
106
COLO_MESSAGE_VMSTATE_SIZE, &local_err);
107
if (local_err) {
108
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
109
}
110
111
qemu_mutex_lock_iothread();
112
- qemu_system_reset(SHUTDOWN_CAUSE_NONE);
113
vmstate_loading = true;
114
- if (qemu_loadvm_state(fb) < 0) {
115
- error_report("COLO: loadvm failed");
116
+ ret = qemu_load_device_state(fb);
117
+ if (ret < 0) {
118
+ error_report("COLO: load device state failed");
119
qemu_mutex_unlock_iothread();
120
goto out;
121
}
122
diff --git a/migration/savevm.c b/migration/savevm.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/migration/savevm.c
125
+++ b/migration/savevm.c
126
@@ -XXX,XX +XXX,XX @@ done:
127
return ret;
128
}
129
130
-static int qemu_save_device_state(QEMUFile *f)
131
+void qemu_savevm_live_state(QEMUFile *f)
132
{
133
- SaveStateEntry *se;
134
+ /* save QEMU_VM_SECTION_END section */
135
+ qemu_savevm_state_complete_precopy(f, true, false);
136
+ qemu_put_byte(f, QEMU_VM_EOF);
137
+}
138
139
- qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
140
- qemu_put_be32(f, QEMU_VM_FILE_VERSION);
141
+int qemu_save_device_state(QEMUFile *f)
142
+{
143
+ SaveStateEntry *se;
144
145
+ if (!migration_in_colo_state()) {
146
+ qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
147
+ qemu_put_be32(f, QEMU_VM_FILE_VERSION);
148
+ }
149
cpu_synchronize_all_states();
150
151
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
152
@@ -XXX,XX +XXX,XX @@ enum LoadVMExitCodes {
153
LOADVM_QUIT = 1,
154
};
155
156
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
157
-
158
/* ------ incoming postcopy messages ------ */
159
/* 'advise' arrives before any transfers just to tell us that a postcopy
160
* *might* happen - it might be skipped if precopy transferred everything
161
@@ -XXX,XX +XXX,XX @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
162
return true;
163
}
164
165
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
166
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
167
{
168
uint8_t section_type;
169
int ret = 0;
170
@@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f)
171
return ret;
172
}
173
174
+int qemu_load_device_state(QEMUFile *f)
175
+{
176
+ MigrationIncomingState *mis = migration_incoming_get_current();
177
+ int ret;
178
+
179
+ /* Load QEMU_VM_SECTION_FULL section */
180
+ ret = qemu_loadvm_state_main(f, mis);
181
+ if (ret < 0) {
182
+ error_report("Failed to load device state: %d", ret);
183
+ return ret;
184
+ }
185
+
186
+ cpu_synchronize_all_post_init();
187
+ return 0;
188
+}
189
+
190
int save_snapshot(const char *name, Error **errp)
191
{
192
BlockDriverState *bs, *bs1;
193
diff --git a/migration/savevm.h b/migration/savevm.h
194
index XXXXXXX..XXXXXXX 100644
195
--- a/migration/savevm.h
196
+++ b/migration/savevm.h
197
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
198
uint64_t *start_list,
199
uint64_t *length_list);
200
void qemu_savevm_send_colo_enable(QEMUFile *f);
201
+void qemu_savevm_live_state(QEMUFile *f);
202
+int qemu_save_device_state(QEMUFile *f);
203
204
int qemu_loadvm_state(QEMUFile *f);
205
void qemu_loadvm_state_cleanup(void);
206
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
207
+int qemu_load_device_state(QEMUFile *f);
208
209
#endif
210
--
211
2.5.0
212
213
diff view generated by jsdifflib
New patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
2
3
Don't need to flush all VM's ram from cache, only
4
flush the dirty pages since last checkpoint
5
6
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
migration/ram.c | 9 +++++++++
14
1 file changed, 9 insertions(+)
15
16
diff --git a/migration/ram.c b/migration/ram.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/migration/ram.c
19
+++ b/migration/ram.c
20
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
21
}
22
ram_state = g_new0(RAMState, 1);
23
ram_state->migration_dirty_pages = 0;
24
+ memory_global_dirty_log_start();
25
26
return 0;
27
28
@@ -XXX,XX +XXX,XX @@ void colo_release_ram_cache(void)
29
{
30
RAMBlock *block;
31
32
+ memory_global_dirty_log_stop();
33
RAMBLOCK_FOREACH_MIGRATABLE(block) {
34
g_free(block->bmap);
35
block->bmap = NULL;
36
@@ -XXX,XX +XXX,XX @@ static void colo_flush_ram_cache(void)
37
void *src_host;
38
unsigned long offset = 0;
39
40
+ memory_global_dirty_log_sync();
41
+ rcu_read_lock();
42
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
43
+ migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
44
+ }
45
+ rcu_read_unlock();
46
+
47
trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
48
rcu_read_lock();
49
block = QLIST_FIRST_RCU(&ram_list.blocks);
50
--
51
2.5.0
52
53
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <zhangckid@gmail.com>
2
2
3
We add a flag to decide whether net_fill_rstate() need read
3
Filter needs to process the event of checkpoint/failover or
4
the vnet_hdr_len or not.
4
other event passed by COLO frame.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
7
Suggested-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
---
10
include/net/net.h | 9 +++++++--
11
include/net/filter.h | 5 +++++
11
net/colo-compare.c | 4 ++--
12
net/filter.c | 17 +++++++++++++++++
12
net/filter-mirror.c | 2 +-
13
net/net.c | 19 +++++++++++++++++++
13
net/net.c | 36 ++++++++++++++++++++++++++++++++----
14
3 files changed, 41 insertions(+)
14
net/socket.c | 8 ++++----
15
5 files changed, 46 insertions(+), 13 deletions(-)
16
15
17
diff --git a/include/net/net.h b/include/net/net.h
16
diff --git a/include/net/filter.h b/include/net/filter.h
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/net.h
18
--- a/include/net/filter.h
20
+++ b/include/net/net.h
19
+++ b/include/net/filter.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct NICState {
20
@@ -XXX,XX +XXX,XX @@ typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc,
22
} NICState;
21
23
22
typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp);
24
struct SocketReadState {
23
25
- int state; /* 0 = getting length, 1 = getting data */
24
+typedef void (FilterHandleEvent) (NetFilterState *nf, int event, Error **errp);
26
+ /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */
25
+
27
+ int state;
26
typedef struct NetFilterClass {
28
+ /* This flag decide whether to read the vnet_hdr_len field */
27
ObjectClass parent_class;
29
+ bool vnet_hdr;
28
30
uint32_t index;
29
@@ -XXX,XX +XXX,XX @@ typedef struct NetFilterClass {
31
uint32_t packet_len;
30
FilterSetup *setup;
32
+ uint32_t vnet_hdr_len;
31
FilterCleanup *cleanup;
33
uint8_t buf[NET_BUFSIZE];
32
FilterStatusChanged *status_changed;
34
SocketReadStateFinalize *finalize;
33
+ FilterHandleEvent *handle_event;
35
};
34
/* mandatory */
36
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
35
FilterReceiveIOV *receive_iov;
37
void print_net_client(Monitor *mon, NetClientState *nc);
36
} NetFilterClass;
38
void hmp_info_network(Monitor *mon, const QDict *qdict);
37
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
39
void net_socket_rs_init(SocketReadState *rs,
38
int iovcnt,
40
- SocketReadStateFinalize *finalize);
39
void *opaque);
41
+ SocketReadStateFinalize *finalize,
40
42
+ bool vnet_hdr);
41
+void colo_notify_filters_event(int event, Error **errp);
43
42
+
44
/* NIC info */
43
#endif /* QEMU_NET_FILTER_H */
45
44
diff --git a/net/filter.c b/net/filter.c
46
diff --git a/net/colo-compare.c b/net/colo-compare.c
47
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
48
--- a/net/colo-compare.c
46
--- a/net/filter.c
49
+++ b/net/colo-compare.c
47
+++ b/net/filter.c
50
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
48
@@ -XXX,XX +XXX,XX @@
51
return;
49
#include "net/vhost_net.h"
52
}
50
#include "qom/object_interfaces.h"
53
51
#include "qemu/iov.h"
54
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
52
+#include "net/colo.h"
55
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
53
+#include "migration/colo.h"
56
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
54
57
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
55
static inline bool qemu_can_skip_netfilter(NetFilterState *nf)
58
56
{
59
g_queue_init(&s->conn_list);
57
@@ -XXX,XX +XXX,XX @@ static void netfilter_finalize(Object *obj)
60
58
g_free(nf->netdev_id);
61
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
59
}
62
index XXXXXXX..XXXXXXX 100644
60
63
--- a/net/filter-mirror.c
61
+static void default_handle_event(NetFilterState *nf, int event, Error **errp)
64
+++ b/net/filter-mirror.c
62
+{
65
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
63
+ switch (event) {
66
}
64
+ case COLO_EVENT_CHECKPOINT:
67
}
65
+ break;
68
66
+ case COLO_EVENT_FAILOVER:
69
- net_socket_rs_init(&s->rs, redirector_rs_finalize);
67
+ object_property_set_str(OBJECT(nf), "off", "status", errp);
70
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
68
+ break;
71
69
+ default:
72
if (s->indev) {
70
+ break;
73
chr = qemu_chr_find(s->indev);
71
+ }
72
+}
73
+
74
static void netfilter_class_init(ObjectClass *oc, void *data)
75
{
76
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
77
+ NetFilterClass *nfc = NETFILTER_CLASS(oc);
78
79
ucc->complete = netfilter_complete;
80
+ nfc->handle_event = default_handle_event;
81
}
82
83
static const TypeInfo netfilter_info = {
74
diff --git a/net/net.c b/net/net.c
84
diff --git a/net/net.c b/net/net.c
75
index XXXXXXX..XXXXXXX 100644
85
index XXXXXXX..XXXXXXX 100644
76
--- a/net/net.c
86
--- a/net/net.c
77
+++ b/net/net.c
87
+++ b/net/net.c
78
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_net_opts = {
88
@@ -XXX,XX +XXX,XX @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
79
};
89
}
80
90
}
81
void net_socket_rs_init(SocketReadState *rs,
91
82
- SocketReadStateFinalize *finalize)
92
+void colo_notify_filters_event(int event, Error **errp)
83
+ SocketReadStateFinalize *finalize,
93
+{
84
+ bool vnet_hdr)
94
+ NetClientState *nc;
95
+ NetFilterState *nf;
96
+ NetFilterClass *nfc = NULL;
97
+ Error *local_err = NULL;
98
+
99
+ QTAILQ_FOREACH(nc, &net_clients, next) {
100
+ QTAILQ_FOREACH(nf, &nc->filters, next) {
101
+ nfc = NETFILTER_GET_CLASS(OBJECT(nf));
102
+ nfc->handle_event(nf, event, &local_err);
103
+ if (local_err) {
104
+ error_propagate(errp, local_err);
105
+ return;
106
+ }
107
+ }
108
+ }
109
+}
110
+
111
void qmp_set_link(const char *name, bool up, Error **errp)
85
{
112
{
86
rs->state = 0;
113
NetClientState *ncs[MAX_QUEUE_NUM];
87
+ rs->vnet_hdr = vnet_hdr;
88
rs->index = 0;
89
rs->packet_len = 0;
90
+ rs->vnet_hdr_len = 0;
91
memset(rs->buf, 0, sizeof(rs->buf));
92
rs->finalize = finalize;
93
}
94
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
95
unsigned int l;
96
97
while (size > 0) {
98
- /* reassemble a packet from the network */
99
- switch (rs->state) { /* 0 = getting length, 1 = getting data */
100
+ /* Reassemble a packet from the network.
101
+ * 0 = getting length.
102
+ * 1 = getting vnet header length.
103
+ * 2 = getting data.
104
+ */
105
+ switch (rs->state) {
106
case 0:
107
l = 4 - rs->index;
108
if (l > size) {
109
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
110
/* got length */
111
rs->packet_len = ntohl(*(uint32_t *)rs->buf);
112
rs->index = 0;
113
- rs->state = 1;
114
+ if (rs->vnet_hdr) {
115
+ rs->state = 1;
116
+ } else {
117
+ rs->state = 2;
118
+ rs->vnet_hdr_len = 0;
119
+ }
120
}
121
break;
122
case 1:
123
+ l = 4 - rs->index;
124
+ if (l > size) {
125
+ l = size;
126
+ }
127
+ memcpy(rs->buf + rs->index, buf, l);
128
+ buf += l;
129
+ size -= l;
130
+ rs->index += l;
131
+ if (rs->index == 4) {
132
+ /* got vnet header length */
133
+ rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf);
134
+ rs->index = 0;
135
+ rs->state = 2;
136
+ }
137
+ break;
138
+ case 2:
139
l = rs->packet_len - rs->index;
140
if (l > size) {
141
l = size;
142
diff --git a/net/socket.c b/net/socket.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/net/socket.c
145
+++ b/net/socket.c
146
@@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque)
147
closesocket(s->fd);
148
149
s->fd = -1;
150
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
151
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
152
s->nc.link_down = true;
153
memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
154
155
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
156
s->fd = fd;
157
s->listen_fd = -1;
158
s->send_fn = net_socket_send_dgram;
159
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
160
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
161
net_socket_read_poll(s, true);
162
163
/* mcast: save bound address as dst */
164
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
165
166
s->fd = fd;
167
s->listen_fd = -1;
168
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
169
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
170
171
/* Disable Nagle algorithm on TCP sockets to reduce latency */
172
socket_set_nodelay(fd);
173
@@ -XXX,XX +XXX,XX @@ static int net_socket_listen_init(NetClientState *peer,
174
s->fd = -1;
175
s->listen_fd = fd;
176
s->nc.link_down = true;
177
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
178
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
179
180
qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
181
return 0;
182
--
114
--
183
2.7.4
115
2.5.0
184
116
185
117
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <zhangckid@gmail.com>
2
2
3
We can use this property flush and send packet with vnet_hdr_len.
3
After one round of checkpoint, the states between PVM and SVM
4
become consistent, so it is unnecessary to adjust the sequence
5
of net packets for old connections, besides, while failover
6
happens, filter-rewriter will into failover mode that needn't
7
handle the new TCP connection.
4
8
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
11
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
13
---
8
net/colo-compare.c | 8 ++++++--
14
net/colo-compare.c | 12 +++++------
9
net/colo.c | 3 ++-
15
net/colo.c | 8 ++++++++
10
net/colo.h | 4 +++-
16
net/colo.h | 2 ++
11
net/filter-rewriter.c | 2 +-
17
net/filter-rewriter.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++
12
4 files changed, 12 insertions(+), 5 deletions(-)
18
4 files changed, 73 insertions(+), 6 deletions(-)
13
19
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo-compare.c
22
--- a/net/colo-compare.c
17
+++ b/net/colo-compare.c
23
+++ b/net/colo-compare.c
18
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
24
@@ -XXX,XX +XXX,XX @@ enum {
19
Connection *conn;
25
SECONDARY_IN,
20
26
};
21
if (mode == PRIMARY_IN) {
27
22
- pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
28
+static void colo_compare_inconsistency_notify(void)
23
+ pkt = packet_new(s->pri_rs.buf,
29
+{
24
+ s->pri_rs.packet_len,
30
+ notifier_list_notify(&colo_compare_notifiers,
25
+ s->pri_rs.vnet_hdr_len);
31
+ migrate_get_current());
26
} else {
32
+}
27
- pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
33
+
28
+ pkt = packet_new(s->sec_rs.buf,
34
static int compare_chr_send(CompareState *s,
29
+ s->sec_rs.packet_len,
35
const uint8_t *buf,
30
+ s->sec_rs.vnet_hdr_len);
36
uint32_t size,
31
}
37
@@ -XXX,XX +XXX,XX @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
32
38
return false;
33
if (parse_packet_early(pkt)) {
39
}
40
41
-static void colo_compare_inconsistency_notify(void)
42
-{
43
- notifier_list_notify(&colo_compare_notifiers,
44
- migrate_get_current());
45
-}
46
-
47
static void colo_compare_tcp(CompareState *s, Connection *conn)
48
{
49
Packet *ppkt = NULL, *spkt = NULL;
34
diff --git a/net/colo.c b/net/colo.c
50
diff --git a/net/colo.c b/net/colo.c
35
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
36
--- a/net/colo.c
52
--- a/net/colo.c
37
+++ b/net/colo.c
53
+++ b/net/colo.c
38
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque)
54
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
39
g_slice_free(Connection, conn);
55
56
return conn;
40
}
57
}
41
58
+
42
-Packet *packet_new(const void *data, int size)
59
+bool connection_has_tracked(GHashTable *connection_track_table,
43
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
60
+ ConnectionKey *key)
44
{
61
+{
45
Packet *pkt = g_slice_new(Packet);
62
+ Connection *conn = g_hash_table_lookup(connection_track_table, key);
46
63
+
47
pkt->data = g_memdup(data, size);
64
+ return conn ? true : false;
48
pkt->size = size;
65
+}
49
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
50
+ pkt->vnet_hdr_len = vnet_hdr_len;
51
52
return pkt;
53
}
54
diff --git a/net/colo.h b/net/colo.h
66
diff --git a/net/colo.h b/net/colo.h
55
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
56
--- a/net/colo.h
68
--- a/net/colo.h
57
+++ b/net/colo.h
69
+++ b/net/colo.h
58
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
70
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque);
59
int size;
71
Connection *connection_get(GHashTable *connection_track_table,
60
/* Time of packet creation, in wall clock ms */
61
int64_t creation_ms;
62
+ /* Get vnet_hdr_len from filter */
63
+ uint32_t vnet_hdr_len;
64
} Packet;
65
66
typedef struct ConnectionKey {
67
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
68
ConnectionKey *key,
72
ConnectionKey *key,
69
GQueue *conn_list);
73
GQueue *conn_list);
74
+bool connection_has_tracked(GHashTable *connection_track_table,
75
+ ConnectionKey *key);
70
void connection_hashtable_reset(GHashTable *connection_track_table);
76
void connection_hashtable_reset(GHashTable *connection_track_table);
71
-Packet *packet_new(const void *data, int size);
77
Packet *packet_new(const void *data, int size, int vnet_hdr_len);
72
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
73
void packet_destroy(void *opaque, void *user_data);
78
void packet_destroy(void *opaque, void *user_data);
74
75
#endif /* QEMU_COLO_PROXY_H */
76
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
79
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
77
index XXXXXXX..XXXXXXX 100644
80
index XXXXXXX..XXXXXXX 100644
78
--- a/net/filter-rewriter.c
81
--- a/net/filter-rewriter.c
79
+++ b/net/filter-rewriter.c
82
+++ b/net/filter-rewriter.c
83
@@ -XXX,XX +XXX,XX @@
84
#include "qemu/main-loop.h"
85
#include "qemu/iov.h"
86
#include "net/checksum.h"
87
+#include "net/colo.h"
88
+#include "migration/colo.h"
89
90
#define FILTER_COLO_REWRITER(obj) \
91
OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
92
93
#define TYPE_FILTER_REWRITER "filter-rewriter"
94
+#define FAILOVER_MODE_ON true
95
+#define FAILOVER_MODE_OFF false
96
97
typedef struct RewriterState {
98
NetFilterState parent_obj;
99
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
100
/* hashtable to save connection */
101
GHashTable *connection_track_table;
102
bool vnet_hdr;
103
+ bool failover_mode;
104
} RewriterState;
105
106
+static void filter_rewriter_failover_mode(RewriterState *s)
107
+{
108
+ s->failover_mode = FAILOVER_MODE_ON;
109
+}
110
+
111
static void filter_rewriter_flush(NetFilterState *nf)
112
{
113
RewriterState *s = FILTER_COLO_REWRITER(nf);
80
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
114
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
81
char *buf = g_malloc0(size);
115
*/
82
116
reverse_connection_key(&key);
83
iov_to_buf(iov, iovcnt, 0, buf, size);
117
}
84
- pkt = packet_new(buf, size);
118
+
85
+ pkt = packet_new(buf, size, 0);
119
+ /* After failover we needn't change new TCP packet */
86
g_free(buf);
120
+ if (s->failover_mode &&
87
121
+ !connection_has_tracked(s->connection_track_table, &key)) {
88
/*
122
+ goto out;
123
+ }
124
+
125
conn = connection_get(s->connection_track_table,
126
&key,
127
NULL);
128
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
129
}
130
}
131
132
+out:
133
packet_destroy(pkt, NULL);
134
pkt = NULL;
135
return 0;
136
}
137
138
+static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data)
139
+{
140
+ Connection *conn = (Connection *)value;
141
+
142
+ conn->offset = 0;
143
+}
144
+
145
+static gboolean offset_is_nonzero(gpointer key,
146
+ gpointer value,
147
+ gpointer user_data)
148
+{
149
+ Connection *conn = (Connection *)value;
150
+
151
+ return conn->offset ? true : false;
152
+}
153
+
154
+static void colo_rewriter_handle_event(NetFilterState *nf, int event,
155
+ Error **errp)
156
+{
157
+ RewriterState *rs = FILTER_COLO_REWRITER(nf);
158
+
159
+ switch (event) {
160
+ case COLO_EVENT_CHECKPOINT:
161
+ g_hash_table_foreach(rs->connection_track_table,
162
+ reset_seq_offset, NULL);
163
+ break;
164
+ case COLO_EVENT_FAILOVER:
165
+ if (!g_hash_table_find(rs->connection_track_table,
166
+ offset_is_nonzero, NULL)) {
167
+ filter_rewriter_failover_mode(rs);
168
+ }
169
+ break;
170
+ default:
171
+ break;
172
+ }
173
+}
174
+
175
static void colo_rewriter_cleanup(NetFilterState *nf)
176
{
177
RewriterState *s = FILTER_COLO_REWRITER(nf);
178
@@ -XXX,XX +XXX,XX @@ static void filter_rewriter_init(Object *obj)
179
RewriterState *s = FILTER_COLO_REWRITER(obj);
180
181
s->vnet_hdr = false;
182
+ s->failover_mode = FAILOVER_MODE_OFF;
183
object_property_add_bool(obj, "vnet_hdr_support",
184
filter_rewriter_get_vnet_hdr,
185
filter_rewriter_set_vnet_hdr, NULL);
186
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_class_init(ObjectClass *oc, void *data)
187
nfc->setup = colo_rewriter_setup;
188
nfc->cleanup = colo_rewriter_cleanup;
189
nfc->receive_iov = colo_rewriter_receive_iov;
190
+ nfc->handle_event = colo_rewriter_handle_event;
191
}
192
193
static const TypeInfo colo_rewriter_info = {
89
--
194
--
90
2.7.4
195
2.5.0
91
196
92
197
diff view generated by jsdifflib
1
From: Michal Privoznik <mprivozn@redhat.com>
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
2
3
We have a function that checks if given number is power of two.
3
Notify all net filters about the checkpoint and failover event.
4
We should prefer it instead of expanding the check on our own.
5
4
6
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
5
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
6
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
8
---
9
hw/net/virtio-net.c | 2 +-
9
migration/colo.c | 15 +++++++++++++++
10
1 file changed, 1 insertion(+), 1 deletion(-)
10
1 file changed, 15 insertions(+)
11
11
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
12
diff --git a/migration/colo.c b/migration/colo.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
14
--- a/migration/colo.c
15
+++ b/hw/net/virtio-net.c
15
+++ b/migration/colo.c
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
16
@@ -XXX,XX +XXX,XX @@
17
*/
17
#include "qapi/qapi-events-migration.h"
18
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
18
#include "qapi/qmp/qerror.h"
19
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
19
#include "sysemu/cpus.h"
20
- (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
20
+#include "net/filter.h"
21
+ !is_power_of_2(n->net_conf.rx_queue_size)) {
21
22
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
22
static bool vmstate_loading;
23
"must be a power of 2 between %d and %d.",
23
static Notifier packets_compare_notifier;
24
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
24
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
25
error_report_err(local_err);
26
}
27
28
+ /* Notify all filters of all NIC to do checkpoint */
29
+ colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
30
+ if (local_err) {
31
+ error_report_err(local_err);
32
+ }
33
+
34
if (!autostart) {
35
error_report("\"-S\" qemu option will be ignored in secondary side");
36
/* recover runstate to normal migration finish state */
37
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
38
goto out;
39
}
40
41
+ /* Notify all filters of all NIC to do checkpoint */
42
+ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
43
+
44
+ if (local_err) {
45
+ qemu_mutex_unlock_iothread();
46
+ goto out;
47
+ }
48
+
49
vmstate_loading = false;
50
vm_start();
51
trace_colo_vm_state_change("stop", "run");
25
--
52
--
26
2.7.4
53
2.5.0
27
54
28
55
diff view generated by jsdifflib
New patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
2
3
COLO thread may sleep at qemu_sem_wait(&s->colo_checkpoint_sem),
4
while failover works begin, It's better to wakeup it to quick
5
the process.
6
7
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
8
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo.c | 8 ++++++++
12
1 file changed, 8 insertions(+)
13
14
diff --git a/migration/colo.c b/migration/colo.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/migration/colo.c
17
+++ b/migration/colo.c
18
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
19
20
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
21
MIGRATION_STATUS_COMPLETED);
22
+ /*
23
+ * kick COLO thread which might wait at
24
+ * qemu_sem_wait(&s->colo_checkpoint_sem).
25
+ */
26
+ colo_checkpoint_notify(migrate_get_current());
27
28
/*
29
* Wake up COLO thread which may blocked in recv() or send(),
30
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
31
32
qemu_sem_wait(&s->colo_checkpoint_sem);
33
34
+ if (s->state != MIGRATION_STATUS_COLO) {
35
+ goto out;
36
+ }
37
ret = colo_do_checkpoint_transaction(s, bioc, fb);
38
if (ret < 0) {
39
goto out;
40
--
41
2.5.0
42
43
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
This diagram make user better understand COLO.
4
Suggested by Markus Armbruster.
5
6
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
docs/COLO-FT.txt | 34 ++++++++++++++++++++++++++++++++++
11
1 file changed, 34 insertions(+)
12
13
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
14
index XXXXXXX..XXXXXXX 100644
15
--- a/docs/COLO-FT.txt
16
+++ b/docs/COLO-FT.txt
17
@@ -XXX,XX +XXX,XX @@ Note:
18
HeartBeat has not been implemented yet, so you need to trigger failover process
19
by using 'x-colo-lost-heartbeat' command.
20
21
+== COLO operation status ==
22
+
23
++-----------------+
24
+| |
25
+| Start COLO |
26
+| |
27
++--------+--------+
28
+ |
29
+ | Main qmp command:
30
+ | migrate-set-capabilities with x-colo
31
+ | migrate
32
+ |
33
+ v
34
++--------+--------+
35
+| |
36
+| COLO running |
37
+| |
38
++--------+--------+
39
+ |
40
+ | Main qmp command:
41
+ | x-colo-lost-heartbeat
42
+ | or
43
+ | some error happened
44
+ v
45
++--------+--------+
46
+| | send qmp event:
47
+| COLO failover | COLO_EXIT
48
+| |
49
++-----------------+
50
+
51
+COLO use the qmp command to switch and report operation status.
52
+The diagram just shows the main qmp command, you can get the detail
53
+in test procedure.
54
+
55
== Test procedure ==
56
1. Startup qemu
57
Primary:
58
--
59
2.5.0
60
61
diff view generated by jsdifflib
1
Spec said offloads should be le64, so use virtio_ldq_p() to guarantee
1
From: liujunjie <liujunjie23@huawei.com>
2
valid endian.
3
2
4
Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration")
3
Before, we did not clear callback like handle_output when delete
5
Cc: qemu-stable@nongnu.org
4
the virtqueue which may result be segmentfault.
6
Cc: Dmitry Fleytman <dfleytma@redhat.com>
5
The scene is as follows:
6
1. Start a vm with multiqueue vhost-net,
7
2. then we write VIRTIO_PCI_GUEST_FEATURES in PCI configuration to
8
triger multiqueue disable in this vm which will delete the virtqueue.
9
In this step, the tx_bh is deleted but the callback virtio_net_handle_tx_bh
10
still exist.
11
3. Finally, we write VIRTIO_PCI_QUEUE_NOTIFY in PCI configuration to
12
notify the deleted virtqueue. In this way, virtio_net_handle_tx_bh
13
will be called and qemu will be crashed.
14
15
Although the way described above is uncommon, we had better reinforce it.
16
17
CC: qemu-stable@nongnu.org
18
Signed-off-by: liujunjie <liujunjie23@huawei.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
20
---
9
hw/net/virtio-net.c | 2 ++
21
hw/virtio/virtio.c | 2 ++
10
1 file changed, 2 insertions(+)
22
1 file changed, 2 insertions(+)
11
23
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
24
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
13
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
26
--- a/hw/virtio/virtio.c
15
+++ b/hw/net/virtio-net.c
27
+++ b/hw/virtio/virtio.c
16
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
28
@@ -XXX,XX +XXX,XX @@ void virtio_del_queue(VirtIODevice *vdev, int n)
17
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
29
18
uint64_t supported_offloads;
30
vdev->vq[n].vring.num = 0;
19
31
vdev->vq[n].vring.num_default = 0;
20
+ offloads = virtio_ldq_p(vdev, &offloads);
32
+ vdev->vq[n].handle_output = NULL;
21
+
33
+ vdev->vq[n].handle_aio_output = NULL;
22
if (!n->has_vnet_hdr) {
34
}
23
return VIRTIO_NET_ERR;
35
24
}
36
static void virtio_set_isr(VirtIODevice *vdev, int value)
25
--
37
--
26
2.7.4
38
2.5.0
27
39
28
40
diff view generated by jsdifflib
New patch
1
In ne2000_receive(), we try to assign size_ to size which converts
2
from size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access of
5
for both buf and buf1.
1
6
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/ne2000.c | 4 ++--
15
1 file changed, 2 insertions(+), 2 deletions(-)
16
17
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/ne2000.c
20
+++ b/hw/net/ne2000.c
21
@@ -XXX,XX +XXX,XX @@ static int ne2000_buffer_full(NE2000State *s)
22
ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
23
{
24
NE2000State *s = qemu_get_nic_opaque(nc);
25
- int size = size_;
26
+ size_t size = size_;
27
uint8_t *p;
28
unsigned int total_len, next, avail, len, index, mcast_idx;
29
uint8_t buf1[60];
30
@@ -XXX,XX +XXX,XX @@ ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
31
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
32
33
#if defined(DEBUG_NE2000)
34
- printf("NE2000: received len=%d\n", size);
35
+ printf("NE2000: received len=%zu\n", size);
36
#endif
37
38
if (s->cmd & E8390_STOP || ne2000_buffer_full(s))
39
--
40
2.5.0
41
42
diff view generated by jsdifflib
New patch
1
In rtl8139_do_receive(), we try to assign size_ to size which converts
2
from size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access of
5
for both buf and buf1.
1
6
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/rtl8139.c | 8 ++++----
15
1 file changed, 4 insertions(+), 4 deletions(-)
16
17
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/rtl8139.c
20
+++ b/hw/net/rtl8139.c
21
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
22
RTL8139State *s = qemu_get_nic_opaque(nc);
23
PCIDevice *d = PCI_DEVICE(s);
24
/* size is the length of the buffer passed to the driver */
25
- int size = size_;
26
+ size_t size = size_;
27
const uint8_t *dot1q_buf = NULL;
28
29
uint32_t packet_header = 0;
30
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
31
static const uint8_t broadcast_macaddr[6] =
32
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
33
34
- DPRINTF(">>> received len=%d\n", size);
35
+ DPRINTF(">>> received len=%zu\n", size);
36
37
/* test if board clock is stopped */
38
if (!s->clock_enabled)
39
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
40
41
if (size+4 > rx_space)
42
{
43
- DPRINTF("C+ Rx mode : descriptor %d size %d received %d + 4\n",
44
+ DPRINTF("C+ Rx mode : descriptor %d size %d received %zu + 4\n",
45
descriptor, rx_space, size);
46
47
s->IntrStatus |= RxOverflow;
48
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
49
if (avail != 0 && RX_ALIGN(size + 8) >= avail)
50
{
51
DPRINTF("rx overflow: rx buffer length %d head 0x%04x "
52
- "read 0x%04x === available 0x%04x need 0x%04x\n",
53
+ "read 0x%04x === available 0x%04x need 0x%04zx\n",
54
s->RxBufferSize, s->RxBufAddr, s->RxBufPtr, avail, size + 8);
55
56
s->IntrStatus |= RxOverflow;
57
--
58
2.5.0
59
60
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
In pcnet_receive(), we try to assign size_ to size which converts from
2
size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access
5
for both buf and buf1.
2
6
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
13
---
6
docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++
14
hw/net/pcnet.c | 4 ++--
7
1 file changed, 26 insertions(+)
15
1 file changed, 2 insertions(+), 2 deletions(-)
8
16
9
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
17
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
10
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
11
--- a/docs/colo-proxy.txt
19
--- a/hw/net/pcnet.c
12
+++ b/docs/colo-proxy.txt
20
+++ b/hw/net/pcnet.c
13
@@ -XXX,XX +XXX,XX @@ Secondary(ip:3.3.3.8):
21
@@ -XXX,XX +XXX,XX @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
14
-chardev socket,id=red1,host=3.3.3.3,port=9004
22
uint8_t buf1[60];
15
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
23
int remaining;
16
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
24
int crc_err = 0;
17
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
25
- int size = size_;
18
+
26
+ size_t size = size_;
19
+If you want to use virtio-net-pci or other driver with vnet_header:
27
20
+
28
if (CSR_DRX(s) || CSR_STOP(s) || CSR_SPND(s) || !size ||
21
+Primary(ip:3.3.3.3):
29
(CSR_LOOP(s) && !s->looptest)) {
22
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
30
return -1;
23
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
31
}
24
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
32
#ifdef PCNET_DEBUG
25
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
33
- printf("pcnet_receive size=%d\n", size);
26
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
34
+ printf("pcnet_receive size=%zu\n", size);
27
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
35
#endif
28
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
36
29
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
37
/* if too small buffer, then expand it */
30
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
31
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
32
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
33
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
34
+
35
+Secondary(ip:3.3.3.8):
36
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
37
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
38
+-chardev socket,id=red0,host=3.3.3.3,port=9003
39
+-chardev socket,id=red1,host=3.3.3.3,port=9004
40
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
41
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
42
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support
43
44
Note:
45
a.COLO-proxy must work with COLO-frame and Block-replication.
46
--
38
--
47
2.7.4
39
2.5.0
48
40
49
41
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
There should not be a reason for passing a packet size greater than
2
INT_MAX. It's usually a hint of bug somewhere, so ignore packet size
3
greater than INT_MAX in qemu_deliver_packet_iov()
2
4
3
Add vnet_hdr_len arguments in NetClientState
5
CC: qemu-stable@nongnu.org
4
that make other module get real vnet_hdr_len easily.
6
Reported-by: Daniel Shapira <daniel@twistlock.com>
5
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
---
9
include/net/net.h | 1 +
10
net/net.c | 7 ++++++-
10
net/net.c | 1 +
11
1 file changed, 6 insertions(+), 1 deletion(-)
11
2 files changed, 2 insertions(+)
12
12
13
diff --git a/include/net/net.h b/include/net/net.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/net/net.h
16
+++ b/include/net/net.h
17
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
18
unsigned int queue_index;
19
unsigned rxfilter_notify_enabled:1;
20
int vring_enable;
21
+ int vnet_hdr_len;
22
QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
23
};
24
25
diff --git a/net/net.c b/net/net.c
13
diff --git a/net/net.c b/net/net.c
26
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
27
--- a/net/net.c
15
--- a/net/net.c
28
+++ b/net/net.c
16
+++ b/net/net.c
29
@@ -XXX,XX +XXX,XX @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
17
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
30
return;
18
void *opaque)
19
{
20
NetClientState *nc = opaque;
21
+ size_t size = iov_size(iov, iovcnt);
22
int ret;
23
24
+ if (size > INT_MAX) {
25
+ return size;
26
+ }
27
+
28
if (nc->link_down) {
29
- return iov_size(iov, iovcnt);
30
+ return size;
31
}
31
}
32
32
33
+ nc->vnet_hdr_len = len;
33
if (nc->receive_disabled) {
34
nc->info->set_vnet_hdr_len(nc, len);
35
}
36
37
--
34
--
38
2.7.4
35
2.5.0
39
36
40
37
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Martin Wilck <mwilck@suse.com>
2
2
3
We add the vnet_hdr_support option for filter-rewriter, default is disabled.
3
The e1000 emulation silently discards RX packets if there's
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
insufficient space in the ring buffer. This leads to errors
5
You can use it for example:
5
on higher-level protocols in the guest, with no indication
6
-object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support
6
about the error cause.
7
7
8
We get the vnet_hdr_len from NetClientState that make us
8
This patch increments the "Missed Packets Count" (MPC) and
9
parse net packet correctly.
9
"Receive No Buffers Count" (RNBC) HW counters in this case.
10
As the emulation has no FIFO for buffering packets that can't
11
immediately be pushed to the guest, these two registers are
12
practically equivalent (see 10.2.7.4, 10.2.7.33 in
13
https://www.intel.com/content/www/us/en/embedded/products/networking/82574l-gbe-controller-datasheet.html).
10
14
11
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
15
On a Linux guest, the register content will be reflected in
16
the "rx_missed_errors" and "rx_no_buffer_count" stats from
17
"ethtool -S", and in the "missed" stat from "ip -s -s link show",
18
giving at least some hint about the error cause inside the guest.
19
20
If the cause is known, problems like this can often be avoided
21
easily, by increasing the number of RX descriptors in the guest
22
e1000 driver (e.g under Linux, "e1000.RxDescriptors=1024").
23
24
The patch also adds a qemu trace message for this condition.
25
26
Signed-off-by: Martin Wilck <mwilck@suse.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
28
---
14
net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++-
29
hw/net/e1000.c | 16 +++++++++++++---
15
qemu-options.hx | 4 ++--
30
hw/net/trace-events | 3 +++
16
2 files changed, 38 insertions(+), 3 deletions(-)
31
2 files changed, 16 insertions(+), 3 deletions(-)
17
32
18
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
33
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
19
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
20
--- a/net/filter-rewriter.c
35
--- a/hw/net/e1000.c
21
+++ b/net/filter-rewriter.c
36
+++ b/hw/net/e1000.c
22
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@
23
#include "qemu-common.h"
38
#include "qemu/range.h"
24
#include "qapi/error.h"
39
25
#include "qapi/qmp/qerror.h"
40
#include "e1000x_common.h"
26
+#include "qemu/error-report.h"
41
+#include "trace.h"
27
#include "qapi-visit.h"
42
28
#include "qom/object.h"
43
static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
29
#include "qemu/main-loop.h"
44
30
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
45
@@ -XXX,XX +XXX,XX @@ static uint64_t rx_desc_base(E1000State *s)
31
NetQueue *incoming_queue;
46
return (bah << 32) + bal;
32
/* hashtable to save connection */
33
GHashTable *connection_track_table;
34
+ bool vnet_hdr;
35
} RewriterState;
36
37
static void filter_rewriter_flush(NetFilterState *nf)
38
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
39
ConnectionKey key;
40
Packet *pkt;
41
ssize_t size = iov_size(iov, iovcnt);
42
+ ssize_t vnet_hdr_len = 0;
43
char *buf = g_malloc0(size);
44
45
iov_to_buf(iov, iovcnt, 0, buf, size);
46
- pkt = packet_new(buf, size, 0);
47
+
48
+ if (s->vnet_hdr) {
49
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
50
+ }
51
+
52
+ pkt = packet_new(buf, size, vnet_hdr_len);
53
g_free(buf);
54
55
/*
56
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
57
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
58
}
47
}
59
48
60
+static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
49
+static void
50
+e1000_receiver_overrun(E1000State *s, size_t size)
61
+{
51
+{
62
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
52
+ trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
63
+
53
+ e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
64
+ return s->vnet_hdr;
54
+ e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
55
+ set_ics(s, 0, E1000_ICS_RXO);
65
+}
56
+}
66
+
57
+
67
+static void filter_rewriter_set_vnet_hdr(Object *obj,
58
static ssize_t
68
+ bool value,
59
e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
69
+ Error **errp)
60
{
70
+{
61
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
71
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
62
desc_offset = 0;
63
total_size = size + e1000x_fcs_len(s->mac_reg);
64
if (!e1000_has_rxbufs(s, total_size)) {
65
- set_ics(s, 0, E1000_ICS_RXO);
66
- return -1;
67
+ e1000_receiver_overrun(s, total_size);
68
+ return -1;
69
}
70
do {
71
desc_size = total_size - desc_offset;
72
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
73
rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
74
DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
75
rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
76
- set_ics(s, 0, E1000_ICS_RXO);
77
+ e1000_receiver_overrun(s, total_size);
78
return -1;
79
}
80
} while (desc_offset < total_size);
81
diff --git a/hw/net/trace-events b/hw/net/trace-events
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/net/trace-events
84
+++ b/hw/net/trace-events
85
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
86
net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X"
87
net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes"
88
89
+# hw/net/e1000.c
90
+e1000_receiver_overrun(size_t s, uint32_t rdh, uint32_t rdt) "Receiver overrun: dropped packet of %lu bytes, RDH=%u, RDT=%u"
72
+
91
+
73
+ s->vnet_hdr = value;
92
# hw/net/e1000x_common.c
74
+}
93
e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d"
75
+
94
e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X"
76
+static void filter_rewriter_init(Object *obj)
77
+{
78
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
79
+
80
+ s->vnet_hdr = false;
81
+ object_property_add_bool(obj, "vnet_hdr_support",
82
+ filter_rewriter_get_vnet_hdr,
83
+ filter_rewriter_set_vnet_hdr, NULL);
84
+}
85
+
86
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
87
{
88
NetFilterClass *nfc = NETFILTER_CLASS(oc);
89
@@ -XXX,XX +XXX,XX @@ static const TypeInfo colo_rewriter_info = {
90
.name = TYPE_FILTER_REWRITER,
91
.parent = TYPE_NETFILTER,
92
.class_init = colo_rewriter_class_init,
93
+ .instance_init = filter_rewriter_init,
94
.instance_size = sizeof(RewriterState),
95
};
96
97
diff --git a/qemu-options.hx b/qemu-options.hx
98
index XXXXXXX..XXXXXXX 100644
99
--- a/qemu-options.hx
100
+++ b/qemu-options.hx
101
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
102
be the same. we can just use indev or outdev, but at least one of indev or outdev
103
need to be specified.
104
105
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}]
106
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
107
108
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
109
secondary from primary to keep secondary tcp connection,and rewrite
110
tcp packet to primary from secondary make tcp packet can be handled by
111
-client.
112
+client.if it has the vnet_hdr_support flag, we can parse packet with vnet header.
113
114
usage:
115
colo secondary:
116
--
95
--
117
2.7.4
96
2.5.0
118
97
119
98
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-redirector, default is disabled.
3
When using the "-device" option, the property is called "mac".
4
If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it.
4
"macaddr" is only used for the legacy "-net nic" option.
5
Because colo-compare or other modules needs the vnet_hdr_len to parse
6
packet, we add this new option send the len to others.
7
You can use it for example:
8
-object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support
9
5
10
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Reported-by: Harald Hoyer <harald@redhat.com>
7
Reviewed-by: Markus Armbruster <armbru@redhat.com>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
10
---
13
net/filter-mirror.c | 23 +++++++++++++++++++++++
11
qemu-options.hx | 2 +-
14
qemu-options.hx | 6 +++---
12
1 file changed, 1 insertion(+), 1 deletion(-)
15
2 files changed, 26 insertions(+), 3 deletions(-)
16
13
17
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/net/filter-mirror.c
20
+++ b/net/filter-mirror.c
21
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
22
s->outdev = g_strdup(value);
23
}
24
25
+static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp)
26
+{
27
+ MirrorState *s = FILTER_REDIRECTOR(obj);
28
+
29
+ return s->vnet_hdr;
30
+}
31
+
32
+static void filter_redirector_set_vnet_hdr(Object *obj,
33
+ bool value,
34
+ Error **errp)
35
+{
36
+ MirrorState *s = FILTER_REDIRECTOR(obj);
37
+
38
+ s->vnet_hdr = value;
39
+}
40
+
41
static void filter_mirror_init(Object *obj)
42
{
43
MirrorState *s = FILTER_MIRROR(obj);
44
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_init(Object *obj)
45
46
static void filter_redirector_init(Object *obj)
47
{
48
+ MirrorState *s = FILTER_REDIRECTOR(obj);
49
+
50
object_property_add_str(obj, "indev", filter_redirector_get_indev,
51
filter_redirector_set_indev, NULL);
52
object_property_add_str(obj, "outdev", filter_redirector_get_outdev,
53
filter_redirector_set_outdev, NULL);
54
+
55
+ s->vnet_hdr = false;
56
+ object_property_add_bool(obj, "vnet_hdr_support",
57
+ filter_redirector_get_vnet_hdr,
58
+ filter_redirector_set_vnet_hdr, NULL);
59
}
60
61
static void filter_mirror_fini(Object *obj)
62
diff --git a/qemu-options.hx b/qemu-options.hx
14
diff --git a/qemu-options.hx b/qemu-options.hx
63
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
64
--- a/qemu-options.hx
16
--- a/qemu-options.hx
65
+++ b/qemu-options.hx
17
+++ b/qemu-options.hx
66
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
18
@@ -XXX,XX +XXX,XX @@ qemu-system-i386 linux.img \
67
19
-netdev socket,id=n2,mcast=230.0.0.1:1234
68
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
20
# launch yet another QEMU instance on same "bus"
69
21
qemu-system-i386 linux.img \
70
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
22
- -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \
71
-outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
23
+ -device e1000,netdev=n3,mac=52:54:00:12:34:58 \
72
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
24
-netdev socket,id=n3,mcast=230.0.0.1:1234
73
25
@end example
74
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
26
75
-@var{chardevid},and redirect indev's packet to filter.
76
+@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
77
+filter-redirector will redirect packet with vnet_hdr_len.
78
Create a filter-redirector we need to differ outdev id from indev id, id can not
79
be the same. we can just use indev or outdev, but at least one of indev or outdev
80
need to be specified.
81
--
27
--
82
2.7.4
28
2.5.0
83
29
84
30
diff view generated by jsdifflib