1
The following changes since commit c5e4e49258e9b89cb34c085a419dd9f862935c48:
1
The following changes since commit 92f8c6fef13b31ba222c4d20ad8afd2b79c4c28e:
2
2
3
Merge remote-tracking branch 'remotes/xanclic/tags/pull-block-2018-09-25' into staging (2018-09-25 16:47:35 +0100)
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210525' into staging (2021-05-25 16:17:06 +0100)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to f3df030edf90db184cd029697e976e24f1925e03:
9
for you to fetch changes up to 90322e646e87c1440661cb3ddbc0cc94309d8a4f:
10
10
11
e1000: indicate dropped packets in HW counters (2018-09-26 11:06:10 +0800)
11
MAINTAINERS: Added eBPF maintainers information. (2021-06-04 15:25:46 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Jason Wang (4):
16
Andrew Melnychenko (7):
17
ne2000: fix possible out of bound access in ne2000_receive
17
net/tap: Added TUNSETSTEERINGEBPF code.
18
rtl8139: fix possible out of bound access
18
net: Added SetSteeringEBPF method for NetClientState.
19
pcnet: fix possible buffer overflow
19
ebpf: Added eBPF RSS program.
20
net: ignore packet size greater than INT_MAX
20
ebpf: Added eBPF RSS loader.
21
virtio-net: Added eBPF RSS to virtio-net.
22
docs: Added eBPF documentation.
23
MAINTAINERS: Added eBPF maintainers information.
21
24
22
Martin Wilck (1):
25
MAINTAINERS | 8 +
23
e1000: indicate dropped packets in HW counters
26
configure | 8 +-
24
27
docs/devel/ebpf_rss.rst | 125 +++++++++
25
Zhang Chen (15):
28
docs/devel/index.rst | 1 +
26
filter-rewriter: Add TCP state machine and fix memory leak in connection_track_table
29
ebpf/ebpf_rss-stub.c | 40 +++
27
colo-compare: implement the process of checkpoint
30
ebpf/ebpf_rss.c | 165 ++++++++++++
28
colo-compare: use notifier to notify packets comparing result
31
ebpf/ebpf_rss.h | 44 ++++
29
COLO: integrate colo compare with colo frame
32
ebpf/meson.build | 1 +
30
COLO: Add block replication into colo process
33
ebpf/rss.bpf.skeleton.h | 431 +++++++++++++++++++++++++++++++
31
COLO: Remove colo_state migration struct
34
ebpf/trace-events | 4 +
32
COLO: Load dirty pages into SVM's RAM cache firstly
35
ebpf/trace.h | 1 +
33
ram/COLO: Record the dirty pages that SVM received
36
hw/net/vhost_net.c | 3 +
34
COLO: Flush memory data from ram cache
37
hw/net/virtio-net.c | 116 ++++++++-
35
qapi/migration.json: Rename COLO unknown mode to none mode.
38
include/hw/virtio/virtio-net.h | 4 +
36
qapi: Add new command to query colo status
39
include/net/net.h | 2 +
37
savevm: split the process of different stages for loadvm/savevm
40
meson.build | 23 ++
38
filter: Add handle_event method for NetFilterClass
41
meson_options.txt | 2 +
39
filter-rewriter: handle checkpoint and failover event
42
net/tap-bsd.c | 5 +
40
docs: Add COLO status diagram to COLO-FT.txt
43
net/tap-linux.c | 13 +
41
44
net/tap-linux.h | 1 +
42
liujunjie (1):
45
net/tap-solaris.c | 5 +
43
clean up callback when del virtqueue
46
net/tap-stub.c | 5 +
44
47
net/tap.c | 9 +
45
zhanghailiang (4):
48
net/tap_int.h | 1 +
46
qmp event: Add COLO_EXIT event to notify users while exited COLO
49
net/vhost-vdpa.c | 2 +
47
COLO: flush host dirty ram from cache
50
tools/ebpf/Makefile.ebpf | 21 ++
48
COLO: notify net filters about checkpoint/failover event
51
tools/ebpf/rss.bpf.c | 571 +++++++++++++++++++++++++++++++++++++++++
49
COLO: quick failover process by kick COLO thread
52
27 files changed, 1607 insertions(+), 4 deletions(-)
50
53
create mode 100644 docs/devel/ebpf_rss.rst
51
docs/COLO-FT.txt | 34 ++++++++
54
create mode 100644 ebpf/ebpf_rss-stub.c
52
hw/net/e1000.c | 16 +++-
55
create mode 100644 ebpf/ebpf_rss.c
53
hw/net/ne2000.c | 4 +-
56
create mode 100644 ebpf/ebpf_rss.h
54
hw/net/pcnet.c | 4 +-
57
create mode 100644 ebpf/meson.build
55
hw/net/rtl8139.c | 8 +-
58
create mode 100644 ebpf/rss.bpf.skeleton.h
56
hw/net/trace-events | 3 +
59
create mode 100644 ebpf/trace-events
57
hw/virtio/virtio.c | 2 +
60
create mode 100644 ebpf/trace.h
58
include/exec/ram_addr.h | 1 +
61
create mode 100755 tools/ebpf/Makefile.ebpf
59
include/migration/colo.h | 11 ++-
62
create mode 100644 tools/ebpf/rss.bpf.c
60
include/net/filter.h | 5 ++
61
migration/Makefile.objs | 2 +-
62
migration/colo-comm.c | 76 -----------------
63
migration/colo-failover.c | 2 +-
64
migration/colo.c | 212 +++++++++++++++++++++++++++++++++++++++++++---
65
migration/migration.c | 46 ++++++++--
66
migration/ram.c | 166 +++++++++++++++++++++++++++++++++++-
67
migration/ram.h | 4 +
68
migration/savevm.c | 53 ++++++++++--
69
migration/savevm.h | 5 ++
70
migration/trace-events | 3 +
71
net/colo-compare.c | 115 ++++++++++++++++++++++---
72
net/colo-compare.h | 24 ++++++
73
net/colo.c | 10 ++-
74
net/colo.h | 11 +--
75
net/filter-rewriter.c | 166 +++++++++++++++++++++++++++++++++---
76
net/filter.c | 17 ++++
77
net/net.c | 26 +++++-
78
qapi/migration.json | 80 +++++++++++++++--
79
vl.c | 2 -
80
29 files changed, 957 insertions(+), 151 deletions(-)
81
delete mode 100644 migration/colo-comm.c
82
create mode 100644 net/colo-compare.h
83
63
84
64
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We add almost full TCP state machine in filter-rewriter, except
4
TCPS_LISTEN and some simplify in VM active close FIN states.
5
The reason for this simplify job is because guest kernel will track
6
the TCP status and wait 2MSL time too, if client resend the FIN packet,
7
guest will resend the last ACK, so we needn't wait 2MSL time in filter-rewriter.
8
9
After a net connection is closed, we didn't clear its related resources
10
in connection_track_table, which will lead to memory leak.
11
12
Let's track the state of net connection, if it is closed, its related
13
resources will be cleared up.
14
15
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
16
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
17
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
20
net/colo.c | 2 +-
21
net/colo.h | 9 ++--
22
net/filter-rewriter.c | 109 ++++++++++++++++++++++++++++++++++++++----
23
3 files changed, 104 insertions(+), 16 deletions(-)
24
25
diff --git a/net/colo.c b/net/colo.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/net/colo.c
28
+++ b/net/colo.c
29
@@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key)
30
conn->ip_proto = key->ip_proto;
31
conn->processing = false;
32
conn->offset = 0;
33
- conn->syn_flag = 0;
34
+ conn->tcp_state = TCPS_CLOSED;
35
conn->pack = 0;
36
conn->sack = 0;
37
g_queue_init(&conn->primary_list);
38
diff --git a/net/colo.h b/net/colo.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/net/colo.h
41
+++ b/net/colo.h
42
@@ -XXX,XX +XXX,XX @@
43
#include "slirp/slirp.h"
44
#include "qemu/jhash.h"
45
#include "qemu/timer.h"
46
+#include "slirp/tcp.h"
47
48
#define HASHTABLE_MAX_SIZE 16384
49
50
@@ -XXX,XX +XXX,XX @@ typedef struct Connection {
51
uint32_t sack;
52
/* offset = secondary_seq - primary_seq */
53
tcp_seq offset;
54
- /*
55
- * we use this flag update offset func
56
- * run once in independent tcp connection
57
- */
58
- int syn_flag;
59
+
60
+ int tcp_state; /* TCP FSM state */
61
+ tcp_seq fin_ack_seq; /* the seq of 'fin=1,ack=1' */
62
} Connection;
63
64
uint32_t connection_key_hash(const void *opaque);
65
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/net/filter-rewriter.c
68
+++ b/net/filter-rewriter.c
69
@@ -XXX,XX +XXX,XX @@ static int is_tcp_packet(Packet *pkt)
70
}
71
72
/* handle tcp packet from primary guest */
73
-static int handle_primary_tcp_pkt(NetFilterState *nf,
74
+static int handle_primary_tcp_pkt(RewriterState *rf,
75
Connection *conn,
76
- Packet *pkt)
77
+ Packet *pkt, ConnectionKey *key)
78
{
79
struct tcphdr *tcp_pkt;
80
81
@@ -XXX,XX +XXX,XX @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
82
trace_colo_filter_rewriter_conn_offset(conn->offset);
83
}
84
85
+ if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN)) &&
86
+ conn->tcp_state == TCPS_SYN_SENT) {
87
+ conn->tcp_state = TCPS_ESTABLISHED;
88
+ }
89
+
90
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
91
/*
92
* we use this flag update offset func
93
* run once in independent tcp connection
94
*/
95
- conn->syn_flag = 1;
96
+ conn->tcp_state = TCPS_SYN_RECEIVED;
97
}
98
99
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
100
- if (conn->syn_flag) {
101
+ if (conn->tcp_state == TCPS_SYN_RECEIVED) {
102
/*
103
* offset = secondary_seq - primary seq
104
* ack packet sent by guest from primary node,
105
* so we use th_ack - 1 get primary_seq
106
*/
107
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
108
- conn->syn_flag = 0;
109
+ conn->tcp_state = TCPS_ESTABLISHED;
110
}
111
if (conn->offset) {
112
/* handle packets to the secondary from the primary */
113
@@ -XXX,XX +XXX,XX @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
114
net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
115
pkt->size - pkt->vnet_hdr_len);
116
}
117
+
118
+ /*
119
+ * Passive close step 3
120
+ */
121
+ if ((conn->tcp_state == TCPS_LAST_ACK) &&
122
+ (ntohl(tcp_pkt->th_ack) == (conn->fin_ack_seq + 1))) {
123
+ conn->tcp_state = TCPS_CLOSED;
124
+ g_hash_table_remove(rf->connection_track_table, key);
125
+ }
126
+ }
127
+
128
+ if ((tcp_pkt->th_flags & TH_FIN) == TH_FIN) {
129
+ /*
130
+ * Passive close.
131
+ * Step 1:
132
+ * The *server* side of this connect is VM, *client* tries to close
133
+ * the connection. We will into CLOSE_WAIT status.
134
+ *
135
+ * Step 2:
136
+ * In this step we will into LAST_ACK status.
137
+ *
138
+ * We got 'fin=1, ack=1' packet from server side, we need to
139
+ * record the seq of 'fin=1, ack=1' packet.
140
+ *
141
+ * Step 3:
142
+ * We got 'ack=1' packets from client side, it acks 'fin=1, ack=1'
143
+ * packet from server side. From this point, we can ensure that there
144
+ * will be no packets in the connection, except that, some errors
145
+ * happen between the path of 'filter object' and vNIC, if this rare
146
+ * case really happen, we can still create a new connection,
147
+ * So it is safe to remove the connection from connection_track_table.
148
+ *
149
+ */
150
+ if (conn->tcp_state == TCPS_ESTABLISHED) {
151
+ conn->tcp_state = TCPS_CLOSE_WAIT;
152
+ }
153
+
154
+ /*
155
+ * Active close step 2.
156
+ */
157
+ if (conn->tcp_state == TCPS_FIN_WAIT_1) {
158
+ conn->tcp_state = TCPS_TIME_WAIT;
159
+ /*
160
+ * For simplify implementation, we needn't wait 2MSL time
161
+ * in filter rewriter. Because guest kernel will track the
162
+ * TCP status and wait 2MSL time, if client resend the FIN
163
+ * packet, guest will apply the last ACK too.
164
+ */
165
+ conn->tcp_state = TCPS_CLOSED;
166
+ g_hash_table_remove(rf->connection_track_table, key);
167
+ }
168
}
169
170
return 0;
171
}
172
173
/* handle tcp packet from secondary guest */
174
-static int handle_secondary_tcp_pkt(NetFilterState *nf,
175
+static int handle_secondary_tcp_pkt(RewriterState *rf,
176
Connection *conn,
177
- Packet *pkt)
178
+ Packet *pkt, ConnectionKey *key)
179
{
180
struct tcphdr *tcp_pkt;
181
182
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
183
trace_colo_filter_rewriter_conn_offset(conn->offset);
184
}
185
186
- if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
187
+ if (conn->tcp_state == TCPS_SYN_RECEIVED &&
188
+ ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
189
/*
190
* save offset = secondary_seq and then
191
* in handle_primary_tcp_pkt make offset
192
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
193
conn->offset = ntohl(tcp_pkt->th_seq);
194
}
195
196
+ /* VM active connect */
197
+ if (conn->tcp_state == TCPS_CLOSED &&
198
+ ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
199
+ conn->tcp_state = TCPS_SYN_SENT;
200
+ }
201
+
202
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
203
/* Only need to adjust seq while offset is Non-zero */
204
if (conn->offset) {
205
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
206
}
207
}
208
209
+ /*
210
+ * Passive close step 2:
211
+ */
212
+ if (conn->tcp_state == TCPS_CLOSE_WAIT &&
213
+ (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == (TH_ACK | TH_FIN)) {
214
+ conn->fin_ack_seq = ntohl(tcp_pkt->th_seq);
215
+ conn->tcp_state = TCPS_LAST_ACK;
216
+ }
217
+
218
+ /*
219
+ * Active close
220
+ *
221
+ * Step 1:
222
+ * The *server* side of this connect is VM, *server* tries to close
223
+ * the connection.
224
+ *
225
+ * Step 2:
226
+ * We will into CLOSE_WAIT status.
227
+ * We simplify the TCPS_FIN_WAIT_2, TCPS_TIME_WAIT and
228
+ * CLOSING status.
229
+ */
230
+ if (conn->tcp_state == TCPS_ESTABLISHED &&
231
+ (tcp_pkt->th_flags & (TH_ACK | TH_FIN)) == TH_FIN) {
232
+ conn->tcp_state = TCPS_FIN_WAIT_1;
233
+ }
234
+
235
return 0;
236
}
237
238
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
239
240
if (sender == nf->netdev) {
241
/* NET_FILTER_DIRECTION_TX */
242
- if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
243
+ if (!handle_primary_tcp_pkt(s, conn, pkt, &key)) {
244
qemu_net_queue_send(s->incoming_queue, sender, 0,
245
(const uint8_t *)pkt->data, pkt->size, NULL);
246
packet_destroy(pkt, NULL);
247
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
248
}
249
} else {
250
/* NET_FILTER_DIRECTION_RX */
251
- if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
252
+ if (!handle_secondary_tcp_pkt(s, conn, pkt, &key)) {
253
qemu_net_queue_send(s->incoming_queue, sender, 0,
254
(const uint8_t *)pkt->data, pkt->size, NULL);
255
packet_destroy(pkt, NULL);
256
--
257
2.17.1
258
259
diff view generated by jsdifflib
1
In pcnet_receive(), we try to assign size_ to size which converts from
1
From: Andrew Melnychenko <andrew@daynix.com>
2
size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access
5
for both buf and buf1.
6
2
7
Fixing by converting the type of size to size_t.
3
Additional code that will be used for eBPF setting steering routine.
8
4
9
CC: qemu-stable@nongnu.org
5
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
7
---
14
hw/net/pcnet.c | 4 ++--
8
net/tap-linux.h | 1 +
15
1 file changed, 2 insertions(+), 2 deletions(-)
9
1 file changed, 1 insertion(+)
16
10
17
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
11
diff --git a/net/tap-linux.h b/net/tap-linux.h
18
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/pcnet.c
13
--- a/net/tap-linux.h
20
+++ b/hw/net/pcnet.c
14
+++ b/net/tap-linux.h
21
@@ -XXX,XX +XXX,XX @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
15
@@ -XXX,XX +XXX,XX @@
22
uint8_t buf1[60];
16
#define TUNSETQUEUE _IOW('T', 217, int)
23
int remaining;
17
#define TUNSETVNETLE _IOW('T', 220, int)
24
int crc_err = 0;
18
#define TUNSETVNETBE _IOW('T', 222, int)
25
- int size = size_;
19
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
26
+ size_t size = size_;
20
27
28
if (CSR_DRX(s) || CSR_STOP(s) || CSR_SPND(s) || !size ||
29
(CSR_LOOP(s) && !s->looptest)) {
30
return -1;
31
}
32
#ifdef PCNET_DEBUG
33
- printf("pcnet_receive size=%d\n", size);
34
+ printf("pcnet_receive size=%zu\n", size);
35
#endif
21
#endif
36
22
37
/* if too small buffer, then expand it */
38
--
23
--
39
2.17.1
24
2.7.4
40
25
41
26
diff view generated by jsdifflib
1
From: Zhang Chen <zhangckid@gmail.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
For COLO FT, both the PVM and SVM run at the same time,
3
For now, that method supported only by Linux TAP.
4
only sync the state while it needs.
4
Linux TAP uses TUNSETSTEERINGEBPF ioctl.
5
5
6
So here, let SVM runs while not doing checkpoint, change
6
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
7
DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100.
8
9
Besides, we forgot to release colo_checkpoint_semd and
10
colo_delay_timer, fix them here.
11
12
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
8
---
18
migration/colo.c | 42 ++++++++++++++++++++++++++++++++++++++++--
9
include/net/net.h | 2 ++
19
migration/migration.c | 6 ++----
10
net/tap-bsd.c | 5 +++++
20
2 files changed, 42 insertions(+), 6 deletions(-)
11
net/tap-linux.c | 13 +++++++++++++
12
net/tap-solaris.c | 5 +++++
13
net/tap-stub.c | 5 +++++
14
net/tap.c | 9 +++++++++
15
net/tap_int.h | 1 +
16
7 files changed, 40 insertions(+)
21
17
22
diff --git a/migration/colo.c b/migration/colo.c
18
diff --git a/include/net/net.h b/include/net/net.h
23
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
24
--- a/migration/colo.c
20
--- a/include/net/net.h
25
+++ b/migration/colo.c
21
+++ b/include/net/net.h
26
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ typedef int (SetVnetBE)(NetClientState *, bool);
27
#include "qemu/error-report.h"
23
typedef struct SocketReadState SocketReadState;
28
#include "migration/failover.h"
24
typedef void (SocketReadStateFinalize)(SocketReadState *rs);
29
#include "replication.h"
25
typedef void (NetAnnounce)(NetClientState *);
30
+#include "net/colo-compare.h"
26
+typedef bool (SetSteeringEBPF)(NetClientState *, int);
31
+#include "net/colo.h"
27
32
28
typedef struct NetClientInfo {
33
static bool vmstate_loading;
29
NetClientDriver type;
34
+static Notifier packets_compare_notifier;
30
@@ -XXX,XX +XXX,XX @@ typedef struct NetClientInfo {
35
31
SetVnetLE *set_vnet_le;
36
#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
32
SetVnetBE *set_vnet_be;
37
33
NetAnnounce *announce;
38
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
34
+ SetSteeringEBPF *set_steering_ebpf;
39
goto out;
35
} NetClientInfo;
40
}
36
41
37
struct NetClientState {
42
+ colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
38
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
43
+ if (local_err) {
39
index XXXXXXX..XXXXXXX 100644
44
+ goto out;
40
--- a/net/tap-bsd.c
41
+++ b/net/tap-bsd.c
42
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
43
{
44
return -1;
45
}
46
+
47
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
48
+{
49
+ return -1;
50
+}
51
diff --git a/net/tap-linux.c b/net/tap-linux.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/net/tap-linux.c
54
+++ b/net/tap-linux.c
55
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
56
pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name);
57
return 0;
58
}
59
+
60
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
61
+{
62
+ if (ioctl(fd, TUNSETSTEERINGEBPF, (void *) &prog_fd) != 0) {
63
+ error_report("Issue while setting TUNSETSTEERINGEBPF:"
64
+ " %s with fd: %d, prog_fd: %d",
65
+ strerror(errno), fd, prog_fd);
66
+
67
+ return -1;
45
+ }
68
+ }
46
+
69
+
47
/* Disable block migration */
70
+ return 0;
48
migrate_set_block_enabled(false, &local_err);
71
+}
49
qemu_savevm_state_header(fb);
72
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
50
@@ -XXX,XX +XXX,XX @@ out:
73
index XXXXXXX..XXXXXXX 100644
51
return ret;
74
--- a/net/tap-solaris.c
75
+++ b/net/tap-solaris.c
76
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
77
{
78
return -1;
52
}
79
}
53
80
+
54
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
81
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
55
+{
82
+{
56
+ colo_checkpoint_notify(data);
83
+ return -1;
84
+}
85
diff --git a/net/tap-stub.c b/net/tap-stub.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/net/tap-stub.c
88
+++ b/net/tap-stub.c
89
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
90
{
91
return -1;
92
}
93
+
94
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
95
+{
96
+ return -1;
97
+}
98
diff --git a/net/tap.c b/net/tap.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/net/tap.c
101
+++ b/net/tap.c
102
@@ -XXX,XX +XXX,XX @@ static void tap_poll(NetClientState *nc, bool enable)
103
tap_write_poll(s, enable);
104
}
105
106
+static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
107
+{
108
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
109
+ assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
110
+
111
+ return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
57
+}
112
+}
58
+
113
+
59
static void colo_process_checkpoint(MigrationState *s)
114
int tap_get_fd(NetClientState *nc)
60
{
115
{
61
QIOChannelBuffer *bioc;
116
TAPState *s = DO_UPCAST(TAPState, nc, nc);
62
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
117
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_tap_info = {
63
goto out;
118
.set_vnet_hdr_len = tap_set_vnet_hdr_len,
64
}
119
.set_vnet_le = tap_set_vnet_le,
65
120
.set_vnet_be = tap_set_vnet_be,
66
+ packets_compare_notifier.notify = colo_compare_notify_checkpoint;
121
+ .set_steering_ebpf = tap_set_steering_ebpf,
67
+ colo_compare_register_notifier(&packets_compare_notifier);
122
};
68
+
123
69
/*
124
static TAPState *net_tap_fd_init(NetClientState *peer,
70
* Wait for Secondary finish loading VM states and enter COLO
125
diff --git a/net/tap_int.h b/net/tap_int.h
71
* restore.
72
@@ -XXX,XX +XXX,XX @@ out:
73
qemu_fclose(fb);
74
}
75
76
- timer_del(s->colo_delay_timer);
77
-
78
/* Hope this not to be too long to wait here */
79
qemu_sem_wait(&s->colo_exit_sem);
80
qemu_sem_destroy(&s->colo_exit_sem);
81
+
82
+ /*
83
+ * It is safe to unregister notifier after failover finished.
84
+ * Besides, colo_delay_timer and colo_checkpoint_sem can't be
85
+ * released befor unregister notifier, or there will be use-after-free
86
+ * error.
87
+ */
88
+ colo_compare_unregister_notifier(&packets_compare_notifier);
89
+ timer_del(s->colo_delay_timer);
90
+ timer_free(s->colo_delay_timer);
91
+ qemu_sem_destroy(&s->colo_checkpoint_sem);
92
+
93
/*
94
* Must be called after failover BH is completed,
95
* Or the failover BH may shutdown the wrong fd that
96
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
97
fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
98
object_unref(OBJECT(bioc));
99
100
+ qemu_mutex_lock_iothread();
101
+ vm_start();
102
+ trace_colo_vm_state_change("stop", "run");
103
+ qemu_mutex_unlock_iothread();
104
+
105
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
106
&local_err);
107
if (local_err) {
108
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
109
goto out;
110
}
111
112
+ qemu_mutex_lock_iothread();
113
+ vm_stop_force_state(RUN_STATE_COLO);
114
+ trace_colo_vm_state_change("run", "stop");
115
+ qemu_mutex_unlock_iothread();
116
+
117
/* FIXME: This is unnecessary for periodic checkpoint mode */
118
colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
119
&local_err);
120
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
121
}
122
123
vmstate_loading = false;
124
+ vm_start();
125
+ trace_colo_vm_state_change("stop", "run");
126
qemu_mutex_unlock_iothread();
127
128
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
129
diff --git a/migration/migration.c b/migration/migration.c
130
index XXXXXXX..XXXXXXX 100644
126
index XXXXXXX..XXXXXXX 100644
131
--- a/migration/migration.c
127
--- a/net/tap_int.h
132
+++ b/migration/migration.c
128
+++ b/net/tap_int.h
133
@@ -XXX,XX +XXX,XX @@
129
@@ -XXX,XX +XXX,XX @@ int tap_fd_set_vnet_be(int fd, int vnet_is_be);
134
/* Migration XBZRLE default cache size */
130
int tap_fd_enable(int fd);
135
#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
131
int tap_fd_disable(int fd);
136
132
int tap_fd_get_ifname(int fd, char *ifname);
137
-/* The delay time (in ms) between two COLO checkpoints
133
+int tap_fd_set_steering_ebpf(int fd, int prog_fd);
138
- * Note: Please change this default value to 10000 when we support hybrid mode.
134
139
- */
135
#endif /* NET_TAP_INT_H */
140
-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
141
+/* The delay time (in ms) between two COLO checkpoints */
142
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
143
#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
144
#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16
145
146
--
136
--
147
2.17.1
137
2.7.4
148
138
149
139
diff view generated by jsdifflib
1
From: Zhang Chen <zhangckid@gmail.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
Filter needs to process the event of checkpoint/failover or
3
RSS program and Makefile to build it.
4
other event passed by COLO frame.
4
The bpftool used to generate '.h' file.
5
The data in that file may be loaded by libbpf.
6
EBPF compilation is not required for building qemu.
7
You can use Makefile if you need to regenerate rss.bpf.skeleton.h.
5
8
6
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
10
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
12
---
11
include/net/filter.h | 5 +++++
13
tools/ebpf/Makefile.ebpf | 21 ++
12
net/filter.c | 17 +++++++++++++++++
14
tools/ebpf/rss.bpf.c | 571 +++++++++++++++++++++++++++++++++++++++++++++++
13
net/net.c | 19 +++++++++++++++++++
15
2 files changed, 592 insertions(+)
14
3 files changed, 41 insertions(+)
16
create mode 100755 tools/ebpf/Makefile.ebpf
17
create mode 100644 tools/ebpf/rss.bpf.c
15
18
16
diff --git a/include/net/filter.h b/include/net/filter.h
19
diff --git a/tools/ebpf/Makefile.ebpf b/tools/ebpf/Makefile.ebpf
17
index XXXXXXX..XXXXXXX 100644
20
new file mode 100755
18
--- a/include/net/filter.h
21
index XXXXXXX..XXXXXXX
19
+++ b/include/net/filter.h
22
--- /dev/null
20
@@ -XXX,XX +XXX,XX @@ typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc,
23
+++ b/tools/ebpf/Makefile.ebpf
21
22
typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp);
23
24
+typedef void (FilterHandleEvent) (NetFilterState *nf, int event, Error **errp);
25
+
26
typedef struct NetFilterClass {
27
ObjectClass parent_class;
28
29
@@ -XXX,XX +XXX,XX @@ typedef struct NetFilterClass {
30
FilterSetup *setup;
31
FilterCleanup *cleanup;
32
FilterStatusChanged *status_changed;
33
+ FilterHandleEvent *handle_event;
34
/* mandatory */
35
FilterReceiveIOV *receive_iov;
36
} NetFilterClass;
37
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
38
int iovcnt,
39
void *opaque);
40
41
+void colo_notify_filters_event(int event, Error **errp);
42
+
43
#endif /* QEMU_NET_FILTER_H */
44
diff --git a/net/filter.c b/net/filter.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/net/filter.c
47
+++ b/net/filter.c
48
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@
49
#include "net/vhost_net.h"
25
+OBJS = rss.bpf.o
50
#include "qom/object_interfaces.h"
26
+
51
#include "qemu/iov.h"
27
+LLC ?= llc
52
+#include "net/colo.h"
28
+CLANG ?= clang
53
+#include "migration/colo.h"
29
+INC_FLAGS = `$(CLANG) -print-file-name=include`
54
30
+EXTRA_CFLAGS ?= -O2 -emit-llvm -fno-stack-protector
55
static inline bool qemu_can_skip_netfilter(NetFilterState *nf)
31
+
56
{
32
+all: $(OBJS)
57
@@ -XXX,XX +XXX,XX @@ static void netfilter_finalize(Object *obj)
33
+
58
g_free(nf->netdev_id);
34
+.PHONY: clean
59
}
35
+
60
36
+clean:
61
+static void default_handle_event(NetFilterState *nf, int event, Error **errp)
37
+    rm -f $(OBJS)
38
+
39
+$(OBJS): %.o:%.c
40
+    $(CLANG) $(INC_FLAGS) \
41
+ -D__KERNEL__ -D__ASM_SYSREG_H \
42
+ -I../include $(LINUXINCLUDE) \
43
+ $(EXTRA_CFLAGS) -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
44
+    bpftool gen skeleton rss.bpf.o > rss.bpf.skeleton.h
45
+    cp rss.bpf.skeleton.h ../../ebpf/
46
diff --git a/tools/ebpf/rss.bpf.c b/tools/ebpf/rss.bpf.c
47
new file mode 100644
48
index XXXXXXX..XXXXXXX
49
--- /dev/null
50
+++ b/tools/ebpf/rss.bpf.c
51
@@ -XXX,XX +XXX,XX @@
52
+/*
53
+ * eBPF RSS program
54
+ *
55
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
56
+ *
57
+ * Authors:
58
+ * Andrew Melnychenko <andrew@daynix.com>
59
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
60
+ *
61
+ * This work is licensed under the terms of the GNU GPL, version 2. See
62
+ * the COPYING file in the top-level directory.
63
+ *
64
+ * Prepare:
65
+ * Requires llvm, clang, bpftool, linux kernel tree
66
+ *
67
+ * Build rss.bpf.skeleton.h:
68
+ * make -f Makefile.ebpf clean all
69
+ */
70
+
71
+#include <stddef.h>
72
+#include <stdbool.h>
73
+#include <linux/bpf.h>
74
+
75
+#include <linux/in.h>
76
+#include <linux/if_ether.h>
77
+#include <linux/ip.h>
78
+#include <linux/ipv6.h>
79
+
80
+#include <linux/udp.h>
81
+#include <linux/tcp.h>
82
+
83
+#include <bpf/bpf_helpers.h>
84
+#include <bpf/bpf_endian.h>
85
+#include <linux/virtio_net.h>
86
+
87
+#define INDIRECTION_TABLE_SIZE 128
88
+#define HASH_CALCULATION_BUFFER_SIZE 36
89
+
90
+struct rss_config_t {
91
+ __u8 redirect;
92
+ __u8 populate_hash;
93
+ __u32 hash_types;
94
+ __u16 indirections_len;
95
+ __u16 default_queue;
96
+} __attribute__((packed));
97
+
98
+struct toeplitz_key_data_t {
99
+ __u32 leftmost_32_bits;
100
+ __u8 next_byte[HASH_CALCULATION_BUFFER_SIZE];
101
+};
102
+
103
+struct packet_hash_info_t {
104
+ __u8 is_ipv4;
105
+ __u8 is_ipv6;
106
+ __u8 is_udp;
107
+ __u8 is_tcp;
108
+ __u8 is_ipv6_ext_src;
109
+ __u8 is_ipv6_ext_dst;
110
+ __u8 is_fragmented;
111
+
112
+ __u16 src_port;
113
+ __u16 dst_port;
114
+
115
+ union {
116
+ struct {
117
+ __be32 in_src;
118
+ __be32 in_dst;
119
+ };
120
+
121
+ struct {
122
+ struct in6_addr in6_src;
123
+ struct in6_addr in6_dst;
124
+ struct in6_addr in6_ext_src;
125
+ struct in6_addr in6_ext_dst;
126
+ };
127
+ };
128
+};
129
+
130
+struct bpf_map_def SEC("maps")
131
+tap_rss_map_configurations = {
132
+ .type = BPF_MAP_TYPE_ARRAY,
133
+ .key_size = sizeof(__u32),
134
+ .value_size = sizeof(struct rss_config_t),
135
+ .max_entries = 1,
136
+};
137
+
138
+struct bpf_map_def SEC("maps")
139
+tap_rss_map_toeplitz_key = {
140
+ .type = BPF_MAP_TYPE_ARRAY,
141
+ .key_size = sizeof(__u32),
142
+ .value_size = sizeof(struct toeplitz_key_data_t),
143
+ .max_entries = 1,
144
+};
145
+
146
+struct bpf_map_def SEC("maps")
147
+tap_rss_map_indirection_table = {
148
+ .type = BPF_MAP_TYPE_ARRAY,
149
+ .key_size = sizeof(__u32),
150
+ .value_size = sizeof(__u16),
151
+ .max_entries = INDIRECTION_TABLE_SIZE,
152
+};
153
+
154
+static inline void net_rx_rss_add_chunk(__u8 *rss_input, size_t *bytes_written,
155
+ const void *ptr, size_t size) {
156
+ __builtin_memcpy(&rss_input[*bytes_written], ptr, size);
157
+ *bytes_written += size;
158
+}
159
+
160
+static inline
161
+void net_toeplitz_add(__u32 *result,
162
+ __u8 *input,
163
+ __u32 len
164
+ , struct toeplitz_key_data_t *key) {
165
+
166
+ __u32 accumulator = *result;
167
+ __u32 leftmost_32_bits = key->leftmost_32_bits;
168
+ __u32 byte;
169
+
170
+ for (byte = 0; byte < HASH_CALCULATION_BUFFER_SIZE; byte++) {
171
+ __u8 input_byte = input[byte];
172
+ __u8 key_byte = key->next_byte[byte];
173
+ __u8 bit;
174
+
175
+ for (bit = 0; bit < 8; bit++) {
176
+ if (input_byte & (1 << 7)) {
177
+ accumulator ^= leftmost_32_bits;
178
+ }
179
+
180
+ leftmost_32_bits =
181
+ (leftmost_32_bits << 1) | ((key_byte & (1 << 7)) >> 7);
182
+
183
+ input_byte <<= 1;
184
+ key_byte <<= 1;
185
+ }
186
+ }
187
+
188
+ *result = accumulator;
189
+}
190
+
191
+
192
+static inline int ip6_extension_header_type(__u8 hdr_type)
62
+{
193
+{
63
+ switch (event) {
194
+ switch (hdr_type) {
64
+ case COLO_EVENT_CHECKPOINT:
195
+ case IPPROTO_HOPOPTS:
65
+ break;
196
+ case IPPROTO_ROUTING:
66
+ case COLO_EVENT_FAILOVER:
197
+ case IPPROTO_FRAGMENT:
67
+ object_property_set_str(OBJECT(nf), "off", "status", errp);
198
+ case IPPROTO_ICMPV6:
68
+ break;
199
+ case IPPROTO_NONE:
200
+ case IPPROTO_DSTOPTS:
201
+ case IPPROTO_MH:
202
+ return 1;
203
+ default:
204
+ return 0;
205
+ }
206
+}
207
+/*
208
+ * According to
209
+ * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml
210
+ * we expect that there are would be no more than 11 extensions in IPv6 header,
211
+ * also there is 27 TLV options for Destination and Hop-by-hop extensions.
212
+ * Need to choose reasonable amount of maximum extensions/options we may
213
+ * check to find ext src/dst.
214
+ */
215
+#define IP6_EXTENSIONS_COUNT 11
216
+#define IP6_OPTIONS_COUNT 30
217
+
218
+static inline int parse_ipv6_ext(struct __sk_buff *skb,
219
+ struct packet_hash_info_t *info,
220
+ __u8 *l4_protocol, size_t *l4_offset)
221
+{
222
+ int err = 0;
223
+
224
+ if (!ip6_extension_header_type(*l4_protocol)) {
225
+ return 0;
226
+ }
227
+
228
+ struct ipv6_opt_hdr ext_hdr = {};
229
+
230
+ for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i) {
231
+
232
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_hdr,
233
+ sizeof(ext_hdr), BPF_HDR_START_NET);
234
+ if (err) {
235
+ goto error;
236
+ }
237
+
238
+ if (*l4_protocol == IPPROTO_ROUTING) {
239
+ struct ipv6_rt_hdr ext_rt = {};
240
+
241
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_rt,
242
+ sizeof(ext_rt), BPF_HDR_START_NET);
243
+ if (err) {
244
+ goto error;
245
+ }
246
+
247
+ if ((ext_rt.type == IPV6_SRCRT_TYPE_2) &&
248
+ (ext_rt.hdrlen == sizeof(struct in6_addr) / 8) &&
249
+ (ext_rt.segments_left == 1)) {
250
+
251
+ err = bpf_skb_load_bytes_relative(skb,
252
+ *l4_offset + offsetof(struct rt2_hdr, addr),
253
+ &info->in6_ext_dst, sizeof(info->in6_ext_dst),
254
+ BPF_HDR_START_NET);
255
+ if (err) {
256
+ goto error;
257
+ }
258
+
259
+ info->is_ipv6_ext_dst = 1;
260
+ }
261
+
262
+ } else if (*l4_protocol == IPPROTO_DSTOPTS) {
263
+ struct ipv6_opt_t {
264
+ __u8 type;
265
+ __u8 length;
266
+ } __attribute__((packed)) opt = {};
267
+
268
+ size_t opt_offset = sizeof(ext_hdr);
269
+
270
+ for (unsigned int j = 0; j < IP6_OPTIONS_COUNT; ++j) {
271
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset + opt_offset,
272
+ &opt, sizeof(opt), BPF_HDR_START_NET);
273
+ if (err) {
274
+ goto error;
275
+ }
276
+
277
+ if (opt.type == IPV6_TLV_HAO) {
278
+ err = bpf_skb_load_bytes_relative(skb,
279
+ *l4_offset + opt_offset
280
+ + offsetof(struct ipv6_destopt_hao, addr),
281
+ &info->in6_ext_src, sizeof(info->in6_ext_src),
282
+ BPF_HDR_START_NET);
283
+ if (err) {
284
+ goto error;
285
+ }
286
+
287
+ info->is_ipv6_ext_src = 1;
288
+ break;
289
+ }
290
+
291
+ opt_offset += (opt.type == IPV6_TLV_PAD1) ?
292
+ 1 : opt.length + sizeof(opt);
293
+
294
+ if (opt_offset + 1 >= ext_hdr.hdrlen * 8) {
295
+ break;
296
+ }
297
+ }
298
+ } else if (*l4_protocol == IPPROTO_FRAGMENT) {
299
+ info->is_fragmented = true;
300
+ }
301
+
302
+ *l4_protocol = ext_hdr.nexthdr;
303
+ *l4_offset += (ext_hdr.hdrlen + 1) * 8;
304
+
305
+ if (!ip6_extension_header_type(ext_hdr.nexthdr)) {
306
+ return 0;
307
+ }
308
+ }
309
+
310
+ return 0;
311
+error:
312
+ return err;
313
+}
314
+
315
+static __be16 parse_eth_type(struct __sk_buff *skb)
316
+{
317
+ unsigned int offset = 12;
318
+ __be16 ret = 0;
319
+ int err = 0;
320
+
321
+ err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
322
+ BPF_HDR_START_MAC);
323
+ if (err) {
324
+ return 0;
325
+ }
326
+
327
+ switch (bpf_ntohs(ret)) {
328
+ case ETH_P_8021AD:
329
+ offset += 4;
330
+ case ETH_P_8021Q:
331
+ offset += 4;
332
+ err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
333
+ BPF_HDR_START_MAC);
69
+ default:
334
+ default:
70
+ break;
335
+ break;
71
+ }
336
+ }
72
+}
337
+
73
+
338
+ if (err) {
74
static void netfilter_class_init(ObjectClass *oc, void *data)
339
+ return 0;
75
{
340
+ }
76
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
341
+
77
+ NetFilterClass *nfc = NETFILTER_CLASS(oc);
342
+ return ret;
78
343
+}
79
ucc->complete = netfilter_complete;
344
+
80
+ nfc->handle_event = default_handle_event;
345
+static inline int parse_packet(struct __sk_buff *skb,
81
}
346
+ struct packet_hash_info_t *info)
82
83
static const TypeInfo netfilter_info = {
84
diff --git a/net/net.c b/net/net.c
85
index XXXXXXX..XXXXXXX 100644
86
--- a/net/net.c
87
+++ b/net/net.c
88
@@ -XXX,XX +XXX,XX @@ void hmp_info_network(Monitor *mon, const QDict *qdict)
89
}
90
}
91
92
+void colo_notify_filters_event(int event, Error **errp)
93
+{
347
+{
94
+ NetClientState *nc;
348
+ int err = 0;
95
+ NetFilterState *nf;
349
+
96
+ NetFilterClass *nfc = NULL;
350
+ if (!info || !skb) {
97
+ Error *local_err = NULL;
351
+ return -1;
98
+
352
+ }
99
+ QTAILQ_FOREACH(nc, &net_clients, next) {
353
+
100
+ QTAILQ_FOREACH(nf, &nc->filters, next) {
354
+ size_t l4_offset = 0;
101
+ nfc = NETFILTER_GET_CLASS(OBJECT(nf));
355
+ __u8 l4_protocol = 0;
102
+ nfc->handle_event(nf, event, &local_err);
356
+ __u16 l3_protocol = bpf_ntohs(parse_eth_type(skb));
103
+ if (local_err) {
357
+ if (l3_protocol == 0) {
104
+ error_propagate(errp, local_err);
358
+ err = -1;
105
+ return;
359
+ goto error;
106
+ }
360
+ }
107
+ }
361
+
108
+ }
362
+ if (l3_protocol == ETH_P_IP) {
109
+}
363
+ info->is_ipv4 = 1;
110
+
364
+
111
void qmp_set_link(const char *name, bool up, Error **errp)
365
+ struct iphdr ip = {};
112
{
366
+ err = bpf_skb_load_bytes_relative(skb, 0, &ip, sizeof(ip),
113
NetClientState *ncs[MAX_QUEUE_NUM];
367
+ BPF_HDR_START_NET);
368
+ if (err) {
369
+ goto error;
370
+ }
371
+
372
+ info->in_src = ip.saddr;
373
+ info->in_dst = ip.daddr;
374
+ info->is_fragmented = !!ip.frag_off;
375
+
376
+ l4_protocol = ip.protocol;
377
+ l4_offset = ip.ihl * 4;
378
+ } else if (l3_protocol == ETH_P_IPV6) {
379
+ info->is_ipv6 = 1;
380
+
381
+ struct ipv6hdr ip6 = {};
382
+ err = bpf_skb_load_bytes_relative(skb, 0, &ip6, sizeof(ip6),
383
+ BPF_HDR_START_NET);
384
+ if (err) {
385
+ goto error;
386
+ }
387
+
388
+ info->in6_src = ip6.saddr;
389
+ info->in6_dst = ip6.daddr;
390
+
391
+ l4_protocol = ip6.nexthdr;
392
+ l4_offset = sizeof(ip6);
393
+
394
+ err = parse_ipv6_ext(skb, info, &l4_protocol, &l4_offset);
395
+ if (err) {
396
+ goto error;
397
+ }
398
+ }
399
+
400
+ if (l4_protocol != 0 && !info->is_fragmented) {
401
+ if (l4_protocol == IPPROTO_TCP) {
402
+ info->is_tcp = 1;
403
+
404
+ struct tcphdr tcp = {};
405
+ err = bpf_skb_load_bytes_relative(skb, l4_offset, &tcp, sizeof(tcp),
406
+ BPF_HDR_START_NET);
407
+ if (err) {
408
+ goto error;
409
+ }
410
+
411
+ info->src_port = tcp.source;
412
+ info->dst_port = tcp.dest;
413
+ } else if (l4_protocol == IPPROTO_UDP) { /* TODO: add udplite? */
414
+ info->is_udp = 1;
415
+
416
+ struct udphdr udp = {};
417
+ err = bpf_skb_load_bytes_relative(skb, l4_offset, &udp, sizeof(udp),
418
+ BPF_HDR_START_NET);
419
+ if (err) {
420
+ goto error;
421
+ }
422
+
423
+ info->src_port = udp.source;
424
+ info->dst_port = udp.dest;
425
+ }
426
+ }
427
+
428
+ return 0;
429
+
430
+error:
431
+ return err;
432
+}
433
+
434
+static inline __u32 calculate_rss_hash(struct __sk_buff *skb,
435
+ struct rss_config_t *config, struct toeplitz_key_data_t *toe)
436
+{
437
+ __u8 rss_input[HASH_CALCULATION_BUFFER_SIZE] = {};
438
+ size_t bytes_written = 0;
439
+ __u32 result = 0;
440
+ int err = 0;
441
+ struct packet_hash_info_t packet_info = {};
442
+
443
+ err = parse_packet(skb, &packet_info);
444
+ if (err) {
445
+ return 0;
446
+ }
447
+
448
+ if (packet_info.is_ipv4) {
449
+ if (packet_info.is_tcp &&
450
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
451
+
452
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
453
+ &packet_info.in_src,
454
+ sizeof(packet_info.in_src));
455
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
456
+ &packet_info.in_dst,
457
+ sizeof(packet_info.in_dst));
458
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
459
+ &packet_info.src_port,
460
+ sizeof(packet_info.src_port));
461
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
462
+ &packet_info.dst_port,
463
+ sizeof(packet_info.dst_port));
464
+ } else if (packet_info.is_udp &&
465
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
466
+
467
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
468
+ &packet_info.in_src,
469
+ sizeof(packet_info.in_src));
470
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
471
+ &packet_info.in_dst,
472
+ sizeof(packet_info.in_dst));
473
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
474
+ &packet_info.src_port,
475
+ sizeof(packet_info.src_port));
476
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
477
+ &packet_info.dst_port,
478
+ sizeof(packet_info.dst_port));
479
+ } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
480
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
481
+ &packet_info.in_src,
482
+ sizeof(packet_info.in_src));
483
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
484
+ &packet_info.in_dst,
485
+ sizeof(packet_info.in_dst));
486
+ }
487
+ } else if (packet_info.is_ipv6) {
488
+ if (packet_info.is_tcp &&
489
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
490
+
491
+ if (packet_info.is_ipv6_ext_src &&
492
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
493
+
494
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
495
+ &packet_info.in6_ext_src,
496
+ sizeof(packet_info.in6_ext_src));
497
+ } else {
498
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
499
+ &packet_info.in6_src,
500
+ sizeof(packet_info.in6_src));
501
+ }
502
+ if (packet_info.is_ipv6_ext_dst &&
503
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
504
+
505
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
506
+ &packet_info.in6_ext_dst,
507
+ sizeof(packet_info.in6_ext_dst));
508
+ } else {
509
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
510
+ &packet_info.in6_dst,
511
+ sizeof(packet_info.in6_dst));
512
+ }
513
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
514
+ &packet_info.src_port,
515
+ sizeof(packet_info.src_port));
516
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
517
+ &packet_info.dst_port,
518
+ sizeof(packet_info.dst_port));
519
+ } else if (packet_info.is_udp &&
520
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
521
+
522
+ if (packet_info.is_ipv6_ext_src &&
523
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
524
+
525
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
526
+ &packet_info.in6_ext_src,
527
+ sizeof(packet_info.in6_ext_src));
528
+ } else {
529
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
530
+ &packet_info.in6_src,
531
+ sizeof(packet_info.in6_src));
532
+ }
533
+ if (packet_info.is_ipv6_ext_dst &&
534
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
535
+
536
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
537
+ &packet_info.in6_ext_dst,
538
+ sizeof(packet_info.in6_ext_dst));
539
+ } else {
540
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
541
+ &packet_info.in6_dst,
542
+ sizeof(packet_info.in6_dst));
543
+ }
544
+
545
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
546
+ &packet_info.src_port,
547
+ sizeof(packet_info.src_port));
548
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
549
+ &packet_info.dst_port,
550
+ sizeof(packet_info.dst_port));
551
+
552
+ } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
553
+ if (packet_info.is_ipv6_ext_src &&
554
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
555
+
556
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
557
+ &packet_info.in6_ext_src,
558
+ sizeof(packet_info.in6_ext_src));
559
+ } else {
560
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
561
+ &packet_info.in6_src,
562
+ sizeof(packet_info.in6_src));
563
+ }
564
+ if (packet_info.is_ipv6_ext_dst &&
565
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
566
+
567
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
568
+ &packet_info.in6_ext_dst,
569
+ sizeof(packet_info.in6_ext_dst));
570
+ } else {
571
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
572
+ &packet_info.in6_dst,
573
+ sizeof(packet_info.in6_dst));
574
+ }
575
+ }
576
+ }
577
+
578
+ if (bytes_written) {
579
+ net_toeplitz_add(&result, rss_input, bytes_written, toe);
580
+ }
581
+
582
+ return result;
583
+}
584
+
585
+SEC("tun_rss_steering")
586
+int tun_rss_steering_prog(struct __sk_buff *skb)
587
+{
588
+
589
+ struct rss_config_t *config;
590
+ struct toeplitz_key_data_t *toe;
591
+
592
+ __u32 key = 0;
593
+ __u32 hash = 0;
594
+
595
+ config = bpf_map_lookup_elem(&tap_rss_map_configurations, &key);
596
+ toe = bpf_map_lookup_elem(&tap_rss_map_toeplitz_key, &key);
597
+
598
+ if (config && toe) {
599
+ if (!config->redirect) {
600
+ return config->default_queue;
601
+ }
602
+
603
+ hash = calculate_rss_hash(skb, config, toe);
604
+ if (hash) {
605
+ __u32 table_idx = hash % config->indirections_len;
606
+ __u16 *queue = 0;
607
+
608
+ queue = bpf_map_lookup_elem(&tap_rss_map_indirection_table,
609
+ &table_idx);
610
+
611
+ if (queue) {
612
+ return *queue;
613
+ }
614
+ }
615
+
616
+ return config->default_queue;
617
+ }
618
+
619
+ return -1;
620
+}
621
+
622
+char _license[] SEC("license") = "GPL v2";
114
--
623
--
115
2.17.1
624
2.7.4
116
625
117
626
diff view generated by jsdifflib
1
From: Zhang Chen <zhangckid@gmail.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
While do checkpoint, we need to flush all the unhandled packets,
3
Added function that loads RSS eBPF program.
4
By using the filter notifier mechanism, we can easily to notify
4
Added stub functions for RSS eBPF loader.
5
every compare object to do this process, which runs inside
5
Added meson and configuration options.
6
of compare threads as a coroutine.
7
6
8
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
7
By default, eBPF feature enabled if libbpf is present in the build system.
9
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
libbpf checked in configuration shell script and meson script.
10
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
10
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
11
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
---
13
include/migration/colo.h | 6 ++++
14
configure | 8 +-
14
net/colo-compare.c | 78 ++++++++++++++++++++++++++++++++++++++++
15
ebpf/ebpf_rss-stub.c | 40 +++++
15
net/colo-compare.h | 22 ++++++++++++
16
ebpf/ebpf_rss.c | 165 ++++++++++++++++++
16
3 files changed, 106 insertions(+)
17
ebpf/ebpf_rss.h | 44 +++++
17
create mode 100644 net/colo-compare.h
18
ebpf/meson.build | 1 +
19
ebpf/rss.bpf.skeleton.h | 431 ++++++++++++++++++++++++++++++++++++++++++++++++
20
ebpf/trace-events | 4 +
21
ebpf/trace.h | 1 +
22
meson.build | 23 +++
23
meson_options.txt | 2 +
24
10 files changed, 718 insertions(+), 1 deletion(-)
25
create mode 100644 ebpf/ebpf_rss-stub.c
26
create mode 100644 ebpf/ebpf_rss.c
27
create mode 100644 ebpf/ebpf_rss.h
28
create mode 100644 ebpf/meson.build
29
create mode 100644 ebpf/rss.bpf.skeleton.h
30
create mode 100644 ebpf/trace-events
31
create mode 100644 ebpf/trace.h
18
32
19
diff --git a/include/migration/colo.h b/include/migration/colo.h
33
diff --git a/configure b/configure
20
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100755
21
--- a/include/migration/colo.h
35
--- a/configure
22
+++ b/include/migration/colo.h
36
+++ b/configure
23
@@ -XXX,XX +XXX,XX @@
37
@@ -XXX,XX +XXX,XX @@ vhost_vsock="$default_feature"
24
#include "qemu-common.h"
38
vhost_user="no"
25
#include "qapi/qapi-types-migration.h"
39
vhost_user_blk_server="auto"
26
40
vhost_user_fs="$default_feature"
27
+enum colo_event {
41
+bpf="auto"
28
+ COLO_EVENT_NONE,
42
kvm="auto"
29
+ COLO_EVENT_CHECKPOINT,
43
hax="auto"
30
+ COLO_EVENT_FAILOVER,
44
hvf="auto"
31
+};
45
@@ -XXX,XX +XXX,XX @@ for opt do
32
+
46
;;
33
void colo_info_init(void);
47
--enable-membarrier) membarrier="yes"
34
48
;;
35
void migrate_start_colo_process(MigrationState *s);
49
+ --disable-bpf) bpf="disabled"
36
diff --git a/net/colo-compare.c b/net/colo-compare.c
50
+ ;;
37
index XXXXXXX..XXXXXXX 100644
51
+ --enable-bpf) bpf="enabled"
38
--- a/net/colo-compare.c
52
+ ;;
39
+++ b/net/colo-compare.c
53
--disable-blobs) blobs="false"
40
@@ -XXX,XX +XXX,XX @@
54
;;
41
#include "qemu/sockets.h"
55
--with-pkgversion=*) pkgversion="$optarg"
42
#include "colo.h"
56
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available
43
#include "sysemu/iothread.h"
57
vhost-user vhost-user backend support
44
+#include "net/colo-compare.h"
58
vhost-user-blk-server vhost-user-blk server support
45
+#include "migration/colo.h"
59
vhost-vdpa vhost-vdpa kernel backend support
46
60
+ bpf BPF kernel support
47
#define TYPE_COLO_COMPARE "colo-compare"
61
spice spice
48
#define COLO_COMPARE(obj) \
62
spice-protocol spice-protocol
49
OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
63
rbd rados block device (rbd)
50
64
@@ -XXX,XX +XXX,XX @@ if test "$skip_meson" = no; then
51
+static QTAILQ_HEAD(, CompareState) net_compares =
65
-Dattr=$attr -Ddefault_devices=$default_devices \
52
+ QTAILQ_HEAD_INITIALIZER(net_compares);
66
-Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
53
+
67
-Dvhost_user_blk_server=$vhost_user_blk_server -Dmultiprocess=$multiprocess \
54
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
68
- -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi \
55
#define MAX_QUEUE_SIZE 1024
69
+ -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi -Dbpf=$bpf\
56
70
$(if test "$default_features" = no; then echo "-Dauto_features=disabled"; fi) \
57
@@ -XXX,XX +XXX,XX @@
71
    -Dtcg_interpreter=$tcg_interpreter \
58
/* TODO: Should be configurable */
72
$cross_arg \
59
#define REGULAR_PACKET_CHECK_MS 3000
73
diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
60
61
+static QemuMutex event_mtx;
62
+static QemuCond event_complete_cond;
63
+static int event_unhandled_count;
64
+
65
/*
66
* + CompareState ++
67
* | |
68
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
69
IOThread *iothread;
70
GMainContext *worker_context;
71
QEMUTimer *packet_check_timer;
72
+
73
+ QEMUBH *event_bh;
74
+ enum colo_event event;
75
+
76
+ QTAILQ_ENTRY(CompareState) next;
77
} CompareState;
78
79
typedef struct CompareClass {
80
@@ -XXX,XX +XXX,XX @@ static void check_old_packet_regular(void *opaque)
81
REGULAR_PACKET_CHECK_MS);
82
}
83
84
+/* Public API, Used for COLO frame to notify compare event */
85
+void colo_notify_compares_event(void *opaque, int event, Error **errp)
86
+{
87
+ CompareState *s;
88
+
89
+ qemu_mutex_lock(&event_mtx);
90
+ QTAILQ_FOREACH(s, &net_compares, next) {
91
+ s->event = event;
92
+ qemu_bh_schedule(s->event_bh);
93
+ event_unhandled_count++;
94
+ }
95
+ /* Wait all compare threads to finish handling this event */
96
+ while (event_unhandled_count > 0) {
97
+ qemu_cond_wait(&event_complete_cond, &event_mtx);
98
+ }
99
+
100
+ qemu_mutex_unlock(&event_mtx);
101
+}
102
+
103
static void colo_compare_timer_init(CompareState *s)
104
{
105
AioContext *ctx = iothread_get_aio_context(s->iothread);
106
@@ -XXX,XX +XXX,XX @@ static void colo_compare_timer_del(CompareState *s)
107
}
108
}
109
110
+static void colo_flush_packets(void *opaque, void *user_data);
111
+
112
+static void colo_compare_handle_event(void *opaque)
113
+{
114
+ CompareState *s = opaque;
115
+
116
+ switch (s->event) {
117
+ case COLO_EVENT_CHECKPOINT:
118
+ g_queue_foreach(&s->conn_list, colo_flush_packets, s);
119
+ break;
120
+ case COLO_EVENT_FAILOVER:
121
+ break;
122
+ default:
123
+ break;
124
+ }
125
+
126
+ assert(event_unhandled_count > 0);
127
+
128
+ qemu_mutex_lock(&event_mtx);
129
+ event_unhandled_count--;
130
+ qemu_cond_broadcast(&event_complete_cond);
131
+ qemu_mutex_unlock(&event_mtx);
132
+}
133
+
134
static void colo_compare_iothread(CompareState *s)
135
{
136
object_ref(OBJECT(s->iothread));
137
@@ -XXX,XX +XXX,XX @@ static void colo_compare_iothread(CompareState *s)
138
s, s->worker_context, true);
139
140
colo_compare_timer_init(s);
141
+ s->event_bh = qemu_bh_new(colo_compare_handle_event, s);
142
}
143
144
static char *compare_get_pri_indev(Object *obj, Error **errp)
145
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
146
net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
147
net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
148
149
+ QTAILQ_INSERT_TAIL(&net_compares, s, next);
150
+
151
g_queue_init(&s->conn_list);
152
153
+ qemu_mutex_init(&event_mtx);
154
+ qemu_cond_init(&event_complete_cond);
155
+
156
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
157
connection_key_equal,
158
g_free,
159
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
160
static void colo_compare_finalize(Object *obj)
161
{
162
CompareState *s = COLO_COMPARE(obj);
163
+ CompareState *tmp = NULL;
164
165
qemu_chr_fe_deinit(&s->chr_pri_in, false);
166
qemu_chr_fe_deinit(&s->chr_sec_in, false);
167
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
168
if (s->iothread) {
169
colo_compare_timer_del(s);
170
}
171
+
172
+ qemu_bh_delete(s->event_bh);
173
+
174
+ QTAILQ_FOREACH(tmp, &net_compares, next) {
175
+ if (tmp == s) {
176
+ QTAILQ_REMOVE(&net_compares, s, next);
177
+ break;
178
+ }
179
+ }
180
+
181
/* Release all unhandled packets after compare thead exited */
182
g_queue_foreach(&s->conn_list, colo_flush_packets, s);
183
184
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
185
if (s->iothread) {
186
object_unref(OBJECT(s->iothread));
187
}
188
+
189
+ qemu_mutex_destroy(&event_mtx);
190
+ qemu_cond_destroy(&event_complete_cond);
191
+
192
g_free(s->pri_indev);
193
g_free(s->sec_indev);
194
g_free(s->outdev);
195
diff --git a/net/colo-compare.h b/net/colo-compare.h
196
new file mode 100644
74
new file mode 100644
197
index XXXXXXX..XXXXXXX
75
index XXXXXXX..XXXXXXX
198
--- /dev/null
76
--- /dev/null
199
+++ b/net/colo-compare.h
77
+++ b/ebpf/ebpf_rss-stub.c
200
@@ -XXX,XX +XXX,XX @@
78
@@ -XXX,XX +XXX,XX @@
201
+/*
79
+/*
202
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
80
+ * eBPF RSS stub file
203
+ * (a.k.a. Fault Tolerance or Continuous Replication)
204
+ *
81
+ *
205
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
82
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
206
+ * Copyright (c) 2017 FUJITSU LIMITED
207
+ * Copyright (c) 2017 Intel Corporation
208
+ *
83
+ *
209
+ * Authors:
84
+ * Authors:
210
+ * zhanghailiang <zhang.zhanghailiang@huawei.com>
85
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
211
+ * Zhang Chen <zhangckid@gmail.com>
212
+ *
86
+ *
213
+ * This work is licensed under the terms of the GNU GPL, version 2 or
87
+ * This work is licensed under the terms of the GNU GPL, version 2. See
214
+ * later. See the COPYING file in the top-level directory.
88
+ * the COPYING file in the top-level directory.
215
+ */
89
+ */
216
+
90
+
217
+#ifndef QEMU_COLO_COMPARE_H
91
+#include "qemu/osdep.h"
218
+#define QEMU_COLO_COMPARE_H
92
+#include "ebpf/ebpf_rss.h"
219
+
93
+
220
+void colo_notify_compares_event(void *opaque, int event, Error **errp);
94
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
221
+
95
+{
222
+#endif /* QEMU_COLO_COMPARE_H */
96
+
97
+}
98
+
99
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
100
+{
101
+ return false;
102
+}
103
+
104
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
105
+{
106
+ return false;
107
+}
108
+
109
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
110
+ uint16_t *indirections_table, uint8_t *toeplitz_key)
111
+{
112
+ return false;
113
+}
114
+
115
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
116
+{
117
+
118
+}
119
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
120
new file mode 100644
121
index XXXXXXX..XXXXXXX
122
--- /dev/null
123
+++ b/ebpf/ebpf_rss.c
124
@@ -XXX,XX +XXX,XX @@
125
+/*
126
+ * eBPF RSS loader
127
+ *
128
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
129
+ *
130
+ * Authors:
131
+ * Andrew Melnychenko <andrew@daynix.com>
132
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
133
+ *
134
+ * This work is licensed under the terms of the GNU GPL, version 2. See
135
+ * the COPYING file in the top-level directory.
136
+ */
137
+
138
+#include "qemu/osdep.h"
139
+#include "qemu/error-report.h"
140
+
141
+#include <bpf/libbpf.h>
142
+#include <bpf/bpf.h>
143
+
144
+#include "hw/virtio/virtio-net.h" /* VIRTIO_NET_RSS_MAX_TABLE_LEN */
145
+
146
+#include "ebpf/ebpf_rss.h"
147
+#include "ebpf/rss.bpf.skeleton.h"
148
+#include "trace.h"
149
+
150
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
151
+{
152
+ if (ctx != NULL) {
153
+ ctx->obj = NULL;
154
+ }
155
+}
156
+
157
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
158
+{
159
+ return ctx != NULL && ctx->obj != NULL;
160
+}
161
+
162
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
163
+{
164
+ struct rss_bpf *rss_bpf_ctx;
165
+
166
+ if (ctx == NULL) {
167
+ return false;
168
+ }
169
+
170
+ rss_bpf_ctx = rss_bpf__open();
171
+ if (rss_bpf_ctx == NULL) {
172
+ trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object");
173
+ goto error;
174
+ }
175
+
176
+ bpf_program__set_socket_filter(rss_bpf_ctx->progs.tun_rss_steering_prog);
177
+
178
+ if (rss_bpf__load(rss_bpf_ctx)) {
179
+ trace_ebpf_error("eBPF RSS", "can not load RSS program");
180
+ goto error;
181
+ }
182
+
183
+ ctx->obj = rss_bpf_ctx;
184
+ ctx->program_fd = bpf_program__fd(
185
+ rss_bpf_ctx->progs.tun_rss_steering_prog);
186
+ ctx->map_configuration = bpf_map__fd(
187
+ rss_bpf_ctx->maps.tap_rss_map_configurations);
188
+ ctx->map_indirections_table = bpf_map__fd(
189
+ rss_bpf_ctx->maps.tap_rss_map_indirection_table);
190
+ ctx->map_toeplitz_key = bpf_map__fd(
191
+ rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
192
+
193
+ return true;
194
+error:
195
+ rss_bpf__destroy(rss_bpf_ctx);
196
+ ctx->obj = NULL;
197
+
198
+ return false;
199
+}
200
+
201
+static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
202
+ struct EBPFRSSConfig *config)
203
+{
204
+ uint32_t map_key = 0;
205
+
206
+ if (!ebpf_rss_is_loaded(ctx)) {
207
+ return false;
208
+ }
209
+ if (bpf_map_update_elem(ctx->map_configuration,
210
+ &map_key, config, 0) < 0) {
211
+ return false;
212
+ }
213
+ return true;
214
+}
215
+
216
+static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
217
+ uint16_t *indirections_table,
218
+ size_t len)
219
+{
220
+ uint32_t i = 0;
221
+
222
+ if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
223
+ len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
224
+ return false;
225
+ }
226
+
227
+ for (; i < len; ++i) {
228
+ if (bpf_map_update_elem(ctx->map_indirections_table, &i,
229
+ indirections_table + i, 0) < 0) {
230
+ return false;
231
+ }
232
+ }
233
+ return true;
234
+}
235
+
236
+static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
237
+ uint8_t *toeplitz_key)
238
+{
239
+ uint32_t map_key = 0;
240
+
241
+ /* prepare toeplitz key */
242
+ uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
243
+
244
+ if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) {
245
+ return false;
246
+ }
247
+ memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
248
+ *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
249
+
250
+ if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
251
+ 0) < 0) {
252
+ return false;
253
+ }
254
+ return true;
255
+}
256
+
257
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
258
+ uint16_t *indirections_table, uint8_t *toeplitz_key)
259
+{
260
+ if (!ebpf_rss_is_loaded(ctx) || config == NULL ||
261
+ indirections_table == NULL || toeplitz_key == NULL) {
262
+ return false;
263
+ }
264
+
265
+ if (!ebpf_rss_set_config(ctx, config)) {
266
+ return false;
267
+ }
268
+
269
+ if (!ebpf_rss_set_indirections_table(ctx, indirections_table,
270
+ config->indirections_len)) {
271
+ return false;
272
+ }
273
+
274
+ if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) {
275
+ return false;
276
+ }
277
+
278
+ return true;
279
+}
280
+
281
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
282
+{
283
+ if (!ebpf_rss_is_loaded(ctx)) {
284
+ return;
285
+ }
286
+
287
+ rss_bpf__destroy(ctx->obj);
288
+ ctx->obj = NULL;
289
+}
290
diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
291
new file mode 100644
292
index XXXXXXX..XXXXXXX
293
--- /dev/null
294
+++ b/ebpf/ebpf_rss.h
295
@@ -XXX,XX +XXX,XX @@
296
+/*
297
+ * eBPF RSS header
298
+ *
299
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
300
+ *
301
+ * Authors:
302
+ * Andrew Melnychenko <andrew@daynix.com>
303
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
304
+ *
305
+ * This work is licensed under the terms of the GNU GPL, version 2. See
306
+ * the COPYING file in the top-level directory.
307
+ */
308
+
309
+#ifndef QEMU_EBPF_RSS_H
310
+#define QEMU_EBPF_RSS_H
311
+
312
+struct EBPFRSSContext {
313
+ void *obj;
314
+ int program_fd;
315
+ int map_configuration;
316
+ int map_toeplitz_key;
317
+ int map_indirections_table;
318
+};
319
+
320
+struct EBPFRSSConfig {
321
+ uint8_t redirect;
322
+ uint8_t populate_hash;
323
+ uint32_t hash_types;
324
+ uint16_t indirections_len;
325
+ uint16_t default_queue;
326
+} __attribute__((packed));
327
+
328
+void ebpf_rss_init(struct EBPFRSSContext *ctx);
329
+
330
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
331
+
332
+bool ebpf_rss_load(struct EBPFRSSContext *ctx);
333
+
334
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
335
+ uint16_t *indirections_table, uint8_t *toeplitz_key);
336
+
337
+void ebpf_rss_unload(struct EBPFRSSContext *ctx);
338
+
339
+#endif /* QEMU_EBPF_RSS_H */
340
diff --git a/ebpf/meson.build b/ebpf/meson.build
341
new file mode 100644
342
index XXXXXXX..XXXXXXX
343
--- /dev/null
344
+++ b/ebpf/meson.build
345
@@ -0,0 +1 @@
346
+common_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
347
diff --git a/ebpf/rss.bpf.skeleton.h b/ebpf/rss.bpf.skeleton.h
348
new file mode 100644
349
index XXXXXXX..XXXXXXX
350
--- /dev/null
351
+++ b/ebpf/rss.bpf.skeleton.h
352
@@ -XXX,XX +XXX,XX @@
353
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
354
+
355
+/* THIS FILE IS AUTOGENERATED! */
356
+#ifndef __RSS_BPF_SKEL_H__
357
+#define __RSS_BPF_SKEL_H__
358
+
359
+#include <stdlib.h>
360
+#include <bpf/libbpf.h>
361
+
362
+struct rss_bpf {
363
+    struct bpf_object_skeleton *skeleton;
364
+    struct bpf_object *obj;
365
+    struct {
366
+        struct bpf_map *tap_rss_map_configurations;
367
+        struct bpf_map *tap_rss_map_indirection_table;
368
+        struct bpf_map *tap_rss_map_toeplitz_key;
369
+    } maps;
370
+    struct {
371
+        struct bpf_program *tun_rss_steering_prog;
372
+    } progs;
373
+    struct {
374
+        struct bpf_link *tun_rss_steering_prog;
375
+    } links;
376
+};
377
+
378
+static void
379
+rss_bpf__destroy(struct rss_bpf *obj)
380
+{
381
+    if (!obj)
382
+        return;
383
+    if (obj->skeleton)
384
+        bpf_object__destroy_skeleton(obj->skeleton);
385
+    free(obj);
386
+}
387
+
388
+static inline int
389
+rss_bpf__create_skeleton(struct rss_bpf *obj);
390
+
391
+static inline struct rss_bpf *
392
+rss_bpf__open_opts(const struct bpf_object_open_opts *opts)
393
+{
394
+    struct rss_bpf *obj;
395
+
396
+    obj = (struct rss_bpf *)calloc(1, sizeof(*obj));
397
+    if (!obj)
398
+        return NULL;
399
+    if (rss_bpf__create_skeleton(obj))
400
+        goto err;
401
+    if (bpf_object__open_skeleton(obj->skeleton, opts))
402
+        goto err;
403
+
404
+    return obj;
405
+err:
406
+    rss_bpf__destroy(obj);
407
+    return NULL;
408
+}
409
+
410
+static inline struct rss_bpf *
411
+rss_bpf__open(void)
412
+{
413
+    return rss_bpf__open_opts(NULL);
414
+}
415
+
416
+static inline int
417
+rss_bpf__load(struct rss_bpf *obj)
418
+{
419
+    return bpf_object__load_skeleton(obj->skeleton);
420
+}
421
+
422
+static inline struct rss_bpf *
423
+rss_bpf__open_and_load(void)
424
+{
425
+    struct rss_bpf *obj;
426
+
427
+    obj = rss_bpf__open();
428
+    if (!obj)
429
+        return NULL;
430
+    if (rss_bpf__load(obj)) {
431
+        rss_bpf__destroy(obj);
432
+        return NULL;
433
+    }
434
+    return obj;
435
+}
436
+
437
+static inline int
438
+rss_bpf__attach(struct rss_bpf *obj)
439
+{
440
+    return bpf_object__attach_skeleton(obj->skeleton);
441
+}
442
+
443
+static inline void
444
+rss_bpf__detach(struct rss_bpf *obj)
445
+{
446
+    return bpf_object__detach_skeleton(obj->skeleton);
447
+}
448
+
449
+static inline int
450
+rss_bpf__create_skeleton(struct rss_bpf *obj)
451
+{
452
+    struct bpf_object_skeleton *s;
453
+
454
+    s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
455
+    if (!s)
456
+        return -1;
457
+    obj->skeleton = s;
458
+
459
+    s->sz = sizeof(*s);
460
+    s->name = "rss_bpf";
461
+    s->obj = &obj->obj;
462
+
463
+    /* maps */
464
+    s->map_cnt = 3;
465
+    s->map_skel_sz = sizeof(*s->maps);
466
+    s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
467
+    if (!s->maps)
468
+        goto err;
469
+
470
+    s->maps[0].name = "tap_rss_map_configurations";
471
+    s->maps[0].map = &obj->maps.tap_rss_map_configurations;
472
+
473
+    s->maps[1].name = "tap_rss_map_indirection_table";
474
+    s->maps[1].map = &obj->maps.tap_rss_map_indirection_table;
475
+
476
+    s->maps[2].name = "tap_rss_map_toeplitz_key";
477
+    s->maps[2].map = &obj->maps.tap_rss_map_toeplitz_key;
478
+
479
+    /* programs */
480
+    s->prog_cnt = 1;
481
+    s->prog_skel_sz = sizeof(*s->progs);
482
+    s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
483
+    if (!s->progs)
484
+        goto err;
485
+
486
+    s->progs[0].name = "tun_rss_steering_prog";
487
+    s->progs[0].prog = &obj->progs.tun_rss_steering_prog;
488
+    s->progs[0].link = &obj->links.tun_rss_steering_prog;
489
+
490
+    s->data_sz = 8088;
491
+    s->data = (void *)"\
492
+\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
493
+\0\0\0\0\0\0\0\0\0\0\0\x18\x1d\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0a\0\
494
+\x01\0\xbf\x18\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x4c\xff\0\0\0\0\xbf\xa7\
495
+\0\0\0\0\0\0\x07\x07\0\0\x4c\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
496
+\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x06\0\0\0\0\0\0\x18\x01\0\0\0\0\0\
497
+\0\0\0\0\0\0\0\0\0\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x07\0\0\0\0\0\0\
498
+\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x66\x02\0\0\0\0\xbf\x79\0\0\
499
+\0\0\0\0\x15\x09\x64\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
500
+\0\x5d\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc0\xff\0\0\0\0\x7b\x1a\xb8\xff\
501
+\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\
502
+\0\x63\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\x1a\x88\xff\0\0\0\0\x7b\
503
+\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\x70\xff\0\0\0\0\x7b\x1a\
504
+\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\xff\0\0\0\0\x7b\x1a\x50\
505
+\xff\0\0\0\0\x15\x08\x4c\x02\0\0\0\0\x6b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
506
+\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
507
+\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\
508
+\x77\0\0\0\x20\0\0\0\x55\0\x11\0\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xd0\xff\
509
+\0\0\0\0\xbf\x13\0\0\0\0\0\0\xdc\x03\0\0\x10\0\0\0\x15\x03\x02\0\0\x81\0\0\x55\
510
+\x03\x0c\0\xa8\x88\0\0\xb7\x02\0\0\x14\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
511
+\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\
512
+\x85\0\0\0\x44\0\0\0\x69\xa1\xd0\xff\0\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\
513
+\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x2f\x02\0\0\0\0\x15\x01\x2e\x02\0\0\0\0\x7b\
514
+\x9a\x30\xff\0\0\0\0\x15\x01\x57\0\x86\xdd\0\0\x55\x01\x3b\0\x08\0\0\0\x7b\x7a\
515
+\x20\xff\0\0\0\0\xb7\x07\0\0\x01\0\0\0\x73\x7a\x50\xff\0\0\0\0\xb7\x01\0\0\0\0\
516
+\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\
517
+\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\
518
+\0\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\
519
+\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x1a\x02\0\0\0\0\x69\xa1\xd6\xff\0\0\
520
+\0\0\x55\x01\x01\0\0\0\0\0\xb7\x07\0\0\0\0\0\0\x61\xa1\xdc\xff\0\0\0\0\x63\x1a\
521
+\x5c\xff\0\0\0\0\x61\xa1\xe0\xff\0\0\0\0\x63\x1a\x60\xff\0\0\0\0\x73\x7a\x56\
522
+\xff\0\0\0\0\x71\xa9\xd9\xff\0\0\0\0\x71\xa1\xd0\xff\0\0\0\0\x67\x01\0\0\x02\0\
523
+\0\0\x57\x01\0\0\x3c\0\0\0\x7b\x1a\x40\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\xbf\
524
+\x91\0\0\0\0\0\0\x57\x01\0\0\xff\0\0\0\x15\x01\x19\0\0\0\0\0\x71\xa1\x56\xff\0\
525
+\0\0\0\x55\x01\x17\0\0\0\0\0\x57\x09\0\0\xff\0\0\0\x15\x09\x7a\x01\x11\0\0\0\
526
+\x55\x09\x14\0\x06\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x53\xff\0\0\0\0\xb7\x01\
527
+\0\0\0\0\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\
528
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
529
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
530
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\xf4\x01\0\0\0\0\x69\xa1\
531
+\xd0\xff\0\0\0\0\x6b\x1a\x58\xff\0\0\0\0\x69\xa1\xd2\xff\0\0\0\0\x6b\x1a\x5a\
532
+\xff\0\0\0\0\x71\xa1\x50\xff\0\0\0\0\x15\x01\xd4\0\0\0\0\0\x71\x62\x03\0\0\0\0\
533
+\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\x12\0\0\0\0\0\0\x71\x63\x04\
534
+\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\0\0\x4f\x31\0\0\0\0\0\0\x67\
535
+\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\xff\0\0\0\0\x79\xa0\x30\xff\
536
+\0\0\0\0\x15\x02\x06\x01\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x02\0\0\0\x15\
537
+\x02\x03\x01\0\0\0\0\x61\xa1\x5c\xff\0\0\0\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\
538
+\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x69\xa1\x58\xff\0\0\0\0\x6b\x1a\xa8\
539
+\xff\0\0\0\0\x69\xa1\x5a\xff\0\0\0\0\x6b\x1a\xaa\xff\0\0\0\0\x05\0\x65\x01\0\0\
540
+\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x51\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\
541
+\xf0\xff\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\
542
+\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\
543
+\xff\xff\xb7\x01\0\0\x28\0\0\0\x7b\x1a\x40\xff\0\0\0\0\xbf\x81\0\0\0\0\0\0\xb7\
544
+\x02\0\0\0\0\0\0\xb7\x04\0\0\x28\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\
545
+\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x10\x01\0\0\0\0\x79\xa1\xe0\
546
+\xff\0\0\0\0\x63\x1a\x64\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x68\xff\0\0\
547
+\0\0\x79\xa1\xd8\xff\0\0\0\0\x63\x1a\x5c\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\
548
+\x1a\x60\xff\0\0\0\0\x79\xa1\xe8\xff\0\0\0\0\x63\x1a\x6c\xff\0\0\0\0\x77\x01\0\
549
+\0\x20\0\0\0\x63\x1a\x70\xff\0\0\0\0\x79\xa1\xf0\xff\0\0\0\0\x63\x1a\x74\xff\0\
550
+\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x78\xff\0\0\0\0\x71\xa9\xd6\xff\0\0\0\0\
551
+\x25\x09\xff\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\0\0\x18\x02\0\0\
552
+\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\0\
553
+\xf8\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x6b\x1a\xfe\xff\0\0\0\0\xb7\x01\0\0\x28\0\0\
554
+\0\x7b\x1a\x40\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x8c\xff\xff\xff\x7b\
555
+\x1a\x18\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x7c\xff\xff\xff\x7b\x1a\
556
+\x10\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\x28\xff\0\0\0\0\x7b\x7a\x20\xff\0\
557
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xfe\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
558
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
559
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x90\
560
+\x01\0\0\0\0\xbf\x91\0\0\0\0\0\0\x15\x01\x23\0\x3c\0\0\0\x15\x01\x59\0\x2c\0\0\
561
+\0\x55\x01\x5a\0\x2b\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xf8\xff\0\0\0\0\xbf\xa3\
562
+\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\
563
+\0\0\0\xb7\x04\0\0\x04\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\
564
+\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x03\x01\0\0\0\
565
+\0\x71\xa1\xfa\xff\0\0\0\0\x55\x01\x4b\0\x02\0\0\0\x71\xa1\xf9\xff\0\0\0\0\x55\
566
+\x01\x49\0\x02\0\0\0\x71\xa1\xfb\xff\0\0\0\0\x55\x01\x47\0\x01\0\0\0\x79\xa2\
567
+\x40\xff\0\0\0\0\x07\x02\0\0\x08\0\0\0\xbf\x81\0\0\0\0\0\0\x79\xa3\x18\xff\0\0\
568
+\0\0\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\
569
+\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\xf2\0\0\0\0\0\
570
+\xb7\x01\0\0\x01\0\0\0\x73\x1a\x55\xff\0\0\0\0\x05\0\x39\0\0\0\0\0\xb7\x01\0\0\
571
+\0\0\0\0\x6b\x1a\xf8\xff\0\0\0\0\xb7\x09\0\0\x02\0\0\0\xb7\x07\0\0\x1e\0\0\0\
572
+\x05\0\x0e\0\0\0\0\0\x79\xa2\x38\xff\0\0\0\0\x0f\x29\0\0\0\0\0\0\xbf\x92\0\0\0\
573
+\0\0\0\x07\x02\0\0\x01\0\0\0\x71\xa3\xff\xff\0\0\0\0\x67\x03\0\0\x03\0\0\0\x2d\
574
+\x23\x02\0\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x2b\0\0\0\0\0\x07\x07\0\0\xff\
575
+\xff\xff\xff\xbf\x72\0\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x77\x02\0\0\x20\0\0\0\
576
+\x15\x02\xf9\xff\0\0\0\0\x7b\x9a\x38\xff\0\0\0\0\x79\xa1\x40\xff\0\0\0\0\x0f\
577
+\x19\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\
578
+\0\0\0\xbf\x92\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\
579
+\0\x44\0\0\0\xbf\x01\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\
580
+\x55\x01\x94\0\0\0\0\0\x71\xa2\xf8\xff\0\0\0\0\x55\x02\x0f\0\xc9\0\0\0\x07\x09\
581
+\0\0\x02\0\0\0\xbf\x81\0\0\0\0\0\0\xbf\x92\0\0\0\0\0\0\x79\xa3\x10\xff\0\0\0\0\
582
+\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\0\0\
583
+\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x87\0\0\0\0\0\xb7\
584
+\x01\0\0\x01\0\0\0\x73\x1a\x54\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x07\0\
585
+\0\0\0\0\xb7\x09\0\0\x01\0\0\0\x15\x02\xd1\xff\0\0\0\0\x71\xa9\xf9\xff\0\0\0\0\
586
+\x07\x09\0\0\x02\0\0\0\x05\0\xce\xff\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x56\
587
+\xff\0\0\0\0\x71\xa1\xff\xff\0\0\0\0\x67\x01\0\0\x03\0\0\0\x79\xa2\x40\xff\0\0\
588
+\0\0\x0f\x12\0\0\0\0\0\0\x07\x02\0\0\x08\0\0\0\x7b\x2a\x40\xff\0\0\0\0\x71\xa9\
589
+\xfe\xff\0\0\0\0\x25\x09\x0e\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\
590
+\0\0\x18\x02\0\0\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\
591
+\0\0\0\0\0\x05\0\x07\0\0\0\0\0\x79\xa1\x28\xff\0\0\0\0\x07\x01\0\0\x01\0\0\0\
592
+\x7b\x1a\x28\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\
593
+\x82\xff\x0b\0\0\0\x05\0\x10\xff\0\0\0\0\x15\x09\xf8\xff\x87\0\0\0\x05\0\xfd\
594
+\xff\0\0\0\0\x71\xa1\x51\xff\0\0\0\0\x79\xa0\x30\xff\0\0\0\0\x15\x01\x17\x01\0\
595
+\0\0\0\x71\x62\x03\0\0\0\0\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\
596
+\x12\0\0\0\0\0\0\x71\x63\x04\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\
597
+\0\0\x4f\x31\0\0\0\0\0\0\x67\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\
598
+\xff\0\0\0\0\x15\x02\x3d\0\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x10\0\0\0\
599
+\x15\x02\x3a\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
600
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
601
+\x07\x03\0\0\x7c\xff\xff\xff\x67\x01\0\0\x38\0\0\0\xc7\x01\0\0\x38\0\0\0\x65\
602
+\x01\x01\0\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
603
+\x6c\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\
604
+\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x65\x01\x01\0\xff\xff\xff\
605
+\xff\xbf\x43\0\0\0\0\0\0\x61\x21\x04\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\x24\0\
606
+\0\0\0\0\0\x4f\x41\0\0\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\0\x61\x21\x08\0\0\0\0\0\
607
+\x61\x22\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\xa8\
608
+\xff\0\0\0\0\x61\x31\0\0\0\0\0\0\x61\x32\x04\0\0\0\0\0\x61\x34\x08\0\0\0\0\0\
609
+\x61\x33\x0c\0\0\0\0\0\x69\xa5\x5a\xff\0\0\0\0\x6b\x5a\xc2\xff\0\0\0\0\x69\xa5\
610
+\x58\xff\0\0\0\0\x6b\x5a\xc0\xff\0\0\0\0\x67\x03\0\0\x20\0\0\0\x4f\x43\0\0\0\0\
611
+\0\0\x7b\x3a\xb8\xff\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\
612
+\xb0\xff\0\0\0\0\x05\0\x6b\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x04\0\0\0\
613
+\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x04\0\0\0\x15\x02\x01\0\0\0\0\0\x05\0\xf7\
614
+\xfe\0\0\0\0\x57\x01\0\0\x01\0\0\0\x15\x01\xd3\0\0\0\0\0\x61\xa1\x5c\xff\0\0\0\
615
+\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x05\
616
+\0\x5e\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x1e\0\0\0\0\0\xbf\x12\0\0\0\0\
617
+\0\0\x57\x02\0\0\x20\0\0\0\x15\x02\x1b\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\
618
+\0\x5c\xff\xff\xff\x71\xa4\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\
619
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\0\x01\0\0\
620
+\x15\x01\x01\0\0\0\0\0\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x6c\
621
+\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\0\0\
622
+\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x15\x01\xc3\xff\0\0\0\0\x05\0\
623
+\xc1\xff\0\0\0\0\xb7\x09\0\0\x3c\0\0\0\x79\xa7\x20\xff\0\0\0\0\x67\0\0\0\x20\0\
624
+\0\0\x77\0\0\0\x20\0\0\0\x15\0\xa5\xfe\0\0\0\0\x05\0\xb0\0\0\0\0\0\x15\x09\x07\
625
+\xff\x87\0\0\0\x05\0\xa2\xfe\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x08\0\0\0\
626
+\x15\x02\xab\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
627
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
628
+\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\x40\0\0\0\x15\x01\x01\0\0\0\0\0\xbf\
629
+\x32\0\0\0\0\0\0\x61\x23\x04\0\0\0\0\0\x67\x03\0\0\x20\0\0\0\x61\x24\0\0\0\0\0\
630
+\0\x4f\x43\0\0\0\0\0\0\x7b\x3a\xa0\xff\0\0\0\0\x61\x23\x08\0\0\0\0\0\x61\x22\
631
+\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x32\0\0\0\0\0\0\x7b\x2a\xa8\xff\0\0\0\
632
+\0\x15\x01\x1c\0\0\0\0\0\x71\xa1\x55\xff\0\0\0\0\x15\x01\x1a\0\0\0\0\0\x61\xa1\
633
+\x98\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x94\xff\0\0\0\0\x4f\x21\0\0\0\0\
634
+\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x90\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\
635
+\xa2\x8c\xff\0\0\0\0\x05\0\x19\0\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x52\xff\
636
+\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\
637
+\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\0\0\0\xb7\x04\0\
638
+\0\x08\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\
639
+\0\0\0\x20\0\0\0\x55\0\x7d\0\0\0\0\0\x05\0\x88\xfe\0\0\0\0\xb7\x09\0\0\x2b\0\0\
640
+\0\x05\0\xc6\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\
641
+\x74\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x70\xff\0\
642
+\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x6c\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\
643
+\x1a\xb0\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x07\x07\0\0\x04\0\0\0\x61\x03\0\0\0\0\
644
+\0\0\xb7\x05\0\0\0\0\0\0\x05\0\x4e\0\0\0\0\0\xaf\x52\0\0\0\0\0\0\xbf\x75\0\0\0\
645
+\0\0\0\x0f\x15\0\0\0\0\0\0\x71\x55\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\
646
+\0\0\0\0\0\x77\0\0\0\x07\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\
647
+\0\x39\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
648
+\x50\0\0\0\0\0\0\x77\0\0\0\x06\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
649
+\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3a\0\0\0\xc7\0\0\0\x3f\0\0\
650
+\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\
651
+\0\0\0\x77\0\0\0\x05\0\0\0\x57\0\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\
652
+\0\0\0\0\x67\0\0\0\x3b\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\
653
+\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x04\0\0\0\x57\0\
654
+\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3c\0\0\0\xc7\
655
+\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\
656
+\x77\0\0\0\x03\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\
657
+\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3d\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\
658
+\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x02\0\0\0\x57\0\0\0\
659
+\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\
660
+\0\0\x3e\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
661
+\x50\0\0\0\0\0\0\x77\0\0\0\x01\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
662
+\x4f\x03\0\0\0\0\0\0\x57\x04\0\0\x01\0\0\0\x87\x04\0\0\0\0\0\0\x5f\x34\0\0\0\0\
663
+\0\0\xaf\x42\0\0\0\0\0\0\x57\x05\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x53\0\
664
+\0\0\0\0\0\x07\x01\0\0\x01\0\0\0\xbf\x25\0\0\0\0\0\0\x15\x01\x0b\0\x24\0\0\0\
665
+\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\xa0\xff\xff\xff\x0f\x12\0\0\0\0\0\0\x71\x24\0\
666
+\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x38\0\0\0\xc7\0\0\0\x38\0\0\0\xb7\x02\
667
+\0\0\0\0\0\0\x65\0\xa9\xff\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\x05\0\xa7\xff\0\
668
+\0\0\0\xbf\x21\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x15\x01\
669
+\x0e\0\0\0\0\0\x71\x63\x06\0\0\0\0\0\x71\x64\x07\0\0\0\0\0\x67\x04\0\0\x08\0\0\
670
+\0\x4f\x34\0\0\0\0\0\0\x3f\x41\0\0\0\0\0\0\x2f\x41\0\0\0\0\0\0\x1f\x12\0\0\0\0\
671
+\0\0\x63\x2a\x50\xff\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x50\xff\xff\xff\
672
+\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\x55\0\x05\0\0\0\0\0\
673
+\x71\x61\x08\0\0\0\0\0\x71\x60\x09\0\0\0\0\0\x67\0\0\0\x08\0\0\0\x4f\x10\0\0\0\
674
+\0\0\0\x95\0\0\0\0\0\0\0\x69\0\0\0\0\0\0\0\x05\0\xfd\xff\0\0\0\0\x02\0\0\0\x04\
675
+\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x02\0\0\0\x04\0\0\0\x28\0\0\0\x01\0\0\0\0\0\
676
+\0\0\x02\0\0\0\x04\0\0\0\x02\0\0\0\x80\0\0\0\0\0\0\0\x47\x50\x4c\x20\x76\x32\0\
677
+\0\0\0\0\0\x10\0\0\0\0\0\0\0\x01\x7a\x52\0\x08\x7c\x0b\x01\x0c\0\0\0\x18\0\0\0\
678
+\x18\0\0\0\0\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
679
+\0\0\0\0\0\0\0\0\0\0\0\0\xa0\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
680
+\0\x60\x02\0\0\0\0\x03\0\x20\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3f\x02\0\0\0\0\
681
+\x03\0\xd0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xed\x01\0\0\0\0\x03\0\x10\x10\0\0\0\
682
+\0\0\0\0\0\0\0\0\0\0\0\xd4\x01\0\0\0\0\x03\0\x20\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\
683
+\0\xa3\x01\0\0\0\0\x03\0\xb8\x12\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x01\0\0\0\0\
684
+\x03\0\x48\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2a\x01\0\0\0\0\x03\0\x10\x13\0\0\0\
685
+\0\0\0\0\0\0\0\0\0\0\0\xe1\0\0\0\0\0\x03\0\xa0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
686
+\x2e\x02\0\0\0\0\x03\0\x28\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x68\x02\0\0\0\0\x03\
687
+\0\xc0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x36\x02\0\0\0\0\x03\0\xc8\x13\0\0\0\0\0\
688
+\0\0\0\0\0\0\0\0\0\x22\x01\0\0\0\0\x03\0\xe8\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
689
+\x02\x01\0\0\0\0\x03\0\x40\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd9\0\0\0\0\0\x03\0\
690
+\xf8\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x26\x02\0\0\0\0\x03\0\x20\x0e\0\0\0\0\0\0\
691
+\0\0\0\0\0\0\0\0\xcc\x01\0\0\0\0\x03\0\x60\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9b\
692
+\x01\0\0\0\0\x03\0\xc8\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5b\x01\0\0\0\0\x03\0\
693
+\x20\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x7c\x01\0\0\0\0\x03\0\x48\x08\0\0\0\0\0\0\
694
+\0\0\0\0\0\0\0\0\x53\x01\0\0\0\0\x03\0\xb8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\
695
+\x01\0\0\0\0\x03\0\xe0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x84\x01\0\0\0\0\x03\0\
696
+\xb8\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1e\x02\0\0\0\0\x03\0\xd8\x09\0\0\0\0\0\0\0\
697
+\0\0\0\0\0\0\0\xc4\x01\0\0\0\0\x03\0\x70\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x93\
698
+\x01\0\0\0\0\x03\0\xa8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\x01\0\0\0\0\x03\0\
699
+\xf0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4b\x01\0\0\0\0\x03\0\0\x0a\0\0\0\0\0\0\0\
700
+\0\0\0\0\0\0\0\x12\x01\0\0\0\0\x03\0\x10\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfa\0\
701
+\0\0\0\0\x03\0\xc0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x58\x02\0\0\0\0\x03\0\x88\
702
+\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x16\x02\0\0\0\0\x03\0\xb8\x0a\0\0\0\0\0\0\0\0\
703
+\0\0\0\0\0\0\xe5\x01\0\0\0\0\x03\0\xc0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbc\x01\
704
+\0\0\0\0\x03\0\0\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x8b\x01\0\0\0\0\x03\0\x18\x0e\
705
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd1\0\0\0\0\0\x03\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
706
+\0\0\x50\x02\0\0\0\0\x03\0\x20\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0e\x02\0\0\0\0\
707
+\x03\0\x48\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6c\x01\0\0\0\0\x03\0\xb0\x04\0\0\0\
708
+\0\0\0\0\0\0\0\0\0\0\0\x43\x01\0\0\0\0\x03\0\xc8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\
709
+\0\xc9\0\0\0\0\0\x03\0\xf8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\x02\0\0\0\0\x03\
710
+\0\xd0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3b\x01\0\0\0\0\x03\0\x98\x0b\0\0\0\0\0\
711
+\0\0\0\0\0\0\0\0\0\xf2\0\0\0\0\0\x03\0\xb8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\
712
+\x02\0\0\0\0\x03\0\xf0\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfe\x01\0\0\0\0\x03\0\
713
+\xf8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xdd\x01\0\0\0\0\x03\0\0\x0c\0\0\0\0\0\0\0\
714
+\0\0\0\0\0\0\0\xb4\x01\0\0\0\0\x03\0\x30\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0a\
715
+\x01\0\0\0\0\x03\0\x90\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc1\0\0\0\0\0\x03\0\xa8\
716
+\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xba\0\0\0\0\0\x03\0\xd0\x01\0\0\0\0\0\0\0\0\0\
717
+\0\0\0\0\0\xf6\x01\0\0\0\0\x03\0\xe0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xac\x01\0\
718
+\0\0\0\x03\0\x30\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x33\x01\0\0\0\0\x03\0\x80\x0e\
719
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xea\0\0\0\0\0\x03\0\x98\x0e\0\0\0\0\0\0\0\0\0\0\0\
720
+\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6b\0\0\0\x11\0\x06\
721
+\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x25\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\x14\
722
+\0\0\0\0\0\0\0\x82\0\0\0\x11\0\x05\0\x28\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x01\0\
723
+\0\0\x11\0\x05\0\x14\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x40\0\0\0\x12\0\x03\0\0\0\
724
+\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\x28\0\0\0\0\0\0\0\x01\0\0\0\x3a\0\0\0\x50\0\0\
725
+\0\0\0\0\0\x01\0\0\0\x3c\0\0\0\x80\x13\0\0\0\0\0\0\x01\0\0\0\x3b\0\0\0\x1c\0\0\
726
+\0\0\0\0\0\x01\0\0\0\x38\0\0\0\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\
727
+\x5f\x74\x6f\x65\x70\x6c\x69\x74\x7a\x5f\x6b\x65\x79\0\x2e\x74\x65\x78\x74\0\
728
+\x6d\x61\x70\x73\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x63\x6f\x6e\
729
+\x66\x69\x67\x75\x72\x61\x74\x69\x6f\x6e\x73\0\x74\x75\x6e\x5f\x72\x73\x73\x5f\
730
+\x73\x74\x65\x65\x72\x69\x6e\x67\x5f\x70\x72\x6f\x67\0\x2e\x72\x65\x6c\x74\x75\
731
+\x6e\x5f\x72\x73\x73\x5f\x73\x74\x65\x65\x72\x69\x6e\x67\0\x5f\x6c\x69\x63\x65\
732
+\x6e\x73\x65\0\x2e\x72\x65\x6c\x2e\x65\x68\x5f\x66\x72\x61\x6d\x65\0\x74\x61\
733
+\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x69\x6e\x64\x69\x72\x65\x63\x74\x69\
734
+\x6f\x6e\x5f\x74\x61\x62\x6c\x65\0\x72\x73\x73\x2e\x62\x70\x66\x2e\x63\0\x2e\
735
+\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x4c\x42\x42\x30\x5f\
736
+\x39\0\x4c\x42\x42\x30\x5f\x38\x39\0\x4c\x42\x42\x30\x5f\x36\x39\0\x4c\x42\x42\
737
+\x30\x5f\x35\x39\0\x4c\x42\x42\x30\x5f\x31\x39\0\x4c\x42\x42\x30\x5f\x31\x30\
738
+\x39\0\x4c\x42\x42\x30\x5f\x39\x38\0\x4c\x42\x42\x30\x5f\x37\x38\0\x4c\x42\x42\
739
+\x30\x5f\x34\x38\0\x4c\x42\x42\x30\x5f\x31\x38\0\x4c\x42\x42\x30\x5f\x38\x37\0\
740
+\x4c\x42\x42\x30\x5f\x34\x37\0\x4c\x42\x42\x30\x5f\x33\x37\0\x4c\x42\x42\x30\
741
+\x5f\x31\x37\0\x4c\x42\x42\x30\x5f\x31\x30\x37\0\x4c\x42\x42\x30\x5f\x39\x36\0\
742
+\x4c\x42\x42\x30\x5f\x37\x36\0\x4c\x42\x42\x30\x5f\x36\x36\0\x4c\x42\x42\x30\
743
+\x5f\x34\x36\0\x4c\x42\x42\x30\x5f\x33\x36\0\x4c\x42\x42\x30\x5f\x32\x36\0\x4c\
744
+\x42\x42\x30\x5f\x31\x30\x36\0\x4c\x42\x42\x30\x5f\x36\x35\0\x4c\x42\x42\x30\
745
+\x5f\x34\x35\0\x4c\x42\x42\x30\x5f\x33\x35\0\x4c\x42\x42\x30\x5f\x34\0\x4c\x42\
746
+\x42\x30\x5f\x35\x34\0\x4c\x42\x42\x30\x5f\x34\x34\0\x4c\x42\x42\x30\x5f\x32\
747
+\x34\0\x4c\x42\x42\x30\x5f\x31\x30\x34\0\x4c\x42\x42\x30\x5f\x39\x33\0\x4c\x42\
748
+\x42\x30\x5f\x38\x33\0\x4c\x42\x42\x30\x5f\x35\x33\0\x4c\x42\x42\x30\x5f\x34\
749
+\x33\0\x4c\x42\x42\x30\x5f\x32\x33\0\x4c\x42\x42\x30\x5f\x31\x30\x33\0\x4c\x42\
750
+\x42\x30\x5f\x38\x32\0\x4c\x42\x42\x30\x5f\x35\x32\0\x4c\x42\x42\x30\x5f\x31\
751
+\x30\x32\0\x4c\x42\x42\x30\x5f\x39\x31\0\x4c\x42\x42\x30\x5f\x38\x31\0\x4c\x42\
752
+\x42\x30\x5f\x37\x31\0\x4c\x42\x42\x30\x5f\x36\x31\0\x4c\x42\x42\x30\x5f\x35\
753
+\x31\0\x4c\x42\x42\x30\x5f\x34\x31\0\x4c\x42\x42\x30\x5f\x32\x31\0\x4c\x42\x42\
754
+\x30\x5f\x31\x31\0\x4c\x42\x42\x30\x5f\x31\x31\x31\0\x4c\x42\x42\x30\x5f\x31\
755
+\x30\x31\0\x4c\x42\x42\x30\x5f\x38\x30\0\x4c\x42\x42\x30\x5f\x36\x30\0\x4c\x42\
756
+\x42\x30\x5f\x35\x30\0\x4c\x42\x42\x30\x5f\x31\x30\0\x4c\x42\x42\x30\x5f\x31\
757
+\x31\x30\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
758
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\
759
+\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa0\x1a\0\0\0\0\0\0\x71\x02\0\
760
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\0\0\0\x01\0\0\
761
+\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
762
+\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5a\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
763
+\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\
764
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x56\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
765
+\0\x60\x1a\0\0\0\0\0\0\x30\0\0\0\0\0\0\0\x09\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\
766
+\x10\0\0\0\0\0\0\0\x20\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\
767
+\x14\0\0\0\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
768
+\0\0\0\x6c\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x54\x14\0\0\0\0\0\
769
+\0\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x78\0\0\
770
+\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x60\x14\0\0\0\0\0\0\x30\0\0\0\0\
771
+\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\0\0\0\x09\0\0\0\0\
772
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x90\x1a\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x09\0\0\0\
773
+\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xb2\0\0\0\x02\0\0\0\0\0\0\0\0\0\
774
+\0\0\0\0\0\0\0\0\0\0\x90\x14\0\0\0\0\0\0\xd0\x05\0\0\0\0\0\0\x01\0\0\0\x39\0\0\
775
+\0\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\0\0";
776
+
777
+    return 0;
778
+err:
779
+    bpf_object__destroy_skeleton(s);
780
+    return -1;
781
+}
782
+
783
+#endif /* __RSS_BPF_SKEL_H__ */
784
diff --git a/ebpf/trace-events b/ebpf/trace-events
785
new file mode 100644
786
index XXXXXXX..XXXXXXX
787
--- /dev/null
788
+++ b/ebpf/trace-events
789
@@ -XXX,XX +XXX,XX @@
790
+# See docs/devel/tracing.txt for syntax documentation.
791
+
792
+# ebpf-rss.c
793
+ebpf_error(const char *s1, const char *s2) "error in %s: %s"
794
diff --git a/ebpf/trace.h b/ebpf/trace.h
795
new file mode 100644
796
index XXXXXXX..XXXXXXX
797
--- /dev/null
798
+++ b/ebpf/trace.h
799
@@ -0,0 +1 @@
800
+#include "trace/trace-ebpf.h"
801
diff --git a/meson.build b/meson.build
802
index XXXXXXX..XXXXXXX 100644
803
--- a/meson.build
804
+++ b/meson.build
805
@@ -XXX,XX +XXX,XX @@ if not get_option('fuse_lseek').disabled()
806
endif
807
endif
808
809
+# libbpf
810
+libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
811
+if libbpf.found() and not cc.links('''
812
+ #include <bpf/libbpf.h>
813
+ int main(void)
814
+ {
815
+ bpf_object__destroy_skeleton(NULL);
816
+ return 0;
817
+ }''', dependencies: libbpf)
818
+ libbpf = not_found
819
+ if get_option('bpf').enabled()
820
+ error('libbpf skeleton test failed')
821
+ else
822
+ warning('libbpf skeleton test failed, disabling')
823
+ endif
824
+endif
825
+
826
if get_option('cfi')
827
cfi_flags=[]
828
# Check for dependency on LTO
829
@@ -XXX,XX +XXX,XX @@ endif
830
config_host_data.set('CONFIG_GTK', gtk.found())
831
config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
832
config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
833
+config_host_data.set('CONFIG_EBPF', libbpf.found())
834
config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
835
config_host_data.set('CONFIG_LIBNFS', libnfs.found())
836
config_host_data.set('CONFIG_RBD', rbd.found())
837
@@ -XXX,XX +XXX,XX @@ if have_system
838
'backends',
839
'backends/tpm',
840
'chardev',
841
+ 'ebpf',
842
'hw/9pfs',
843
'hw/acpi',
844
'hw/adc',
845
@@ -XXX,XX +XXX,XX @@ subdir('accel')
846
subdir('plugins')
847
subdir('bsd-user')
848
subdir('linux-user')
849
+subdir('ebpf')
850
+
851
+common_ss.add(libbpf)
852
853
bsd_user_ss.add(files('gdbstub.c'))
854
specific_ss.add_all(when: 'CONFIG_BSD_USER', if_true: bsd_user_ss)
855
@@ -XXX,XX +XXX,XX @@ summary_info += {'RDMA support': config_host.has_key('CONFIG_RDMA')}
856
summary_info += {'PVRDMA support': config_host.has_key('CONFIG_PVRDMA')}
857
summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
858
summary_info += {'libcap-ng support': libcap_ng.found()}
859
+summary_info += {'bpf support': libbpf.found()}
860
# TODO: add back protocol and server version
861
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
862
summary_info += {'rbd support': rbd.found()}
863
diff --git a/meson_options.txt b/meson_options.txt
864
index XXXXXXX..XXXXXXX 100644
865
--- a/meson_options.txt
866
+++ b/meson_options.txt
867
@@ -XXX,XX +XXX,XX @@ option('bzip2', type : 'feature', value : 'auto',
868
description: 'bzip2 support for DMG images')
869
option('cap_ng', type : 'feature', value : 'auto',
870
description: 'cap_ng support')
871
+option('bpf', type : 'feature', value : 'auto',
872
+ description: 'eBPF support')
873
option('cocoa', type : 'feature', value : 'auto',
874
description: 'Cocoa user interface (macOS only)')
875
option('curl', type : 'feature', value : 'auto',
223
--
876
--
224
2.17.1
877
2.7.4
225
878
226
879
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
It's a good idea to use notifier to notify COLO frame of
4
inconsistent packets comparing.
5
6
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
net/colo-compare.c | 37 ++++++++++++++++++++++++++-----------
12
net/colo-compare.h | 2 ++
13
2 files changed, 28 insertions(+), 11 deletions(-)
14
15
diff --git a/net/colo-compare.c b/net/colo-compare.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/net/colo-compare.c
18
+++ b/net/colo-compare.c
19
@@ -XXX,XX +XXX,XX @@
20
#include "sysemu/iothread.h"
21
#include "net/colo-compare.h"
22
#include "migration/colo.h"
23
+#include "migration/migration.h"
24
25
#define TYPE_COLO_COMPARE "colo-compare"
26
#define COLO_COMPARE(obj) \
27
@@ -XXX,XX +XXX,XX @@
28
static QTAILQ_HEAD(, CompareState) net_compares =
29
QTAILQ_HEAD_INITIALIZER(net_compares);
30
31
+static NotifierList colo_compare_notifiers =
32
+ NOTIFIER_LIST_INITIALIZER(colo_compare_notifiers);
33
+
34
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
35
#define MAX_QUEUE_SIZE 1024
36
37
@@ -XXX,XX +XXX,XX @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
38
return false;
39
}
40
41
+static void colo_compare_inconsistency_notify(void)
42
+{
43
+ notifier_list_notify(&colo_compare_notifiers,
44
+ migrate_get_current());
45
+}
46
+
47
static void colo_compare_tcp(CompareState *s, Connection *conn)
48
{
49
Packet *ppkt = NULL, *spkt = NULL;
50
@@ -XXX,XX +XXX,XX @@ sec:
51
qemu_hexdump((char *)spkt->data, stderr,
52
"colo-compare spkt", spkt->size);
53
54
- /*
55
- * colo_compare_inconsistent_notify();
56
- * TODO: notice to checkpoint();
57
- */
58
+ colo_compare_inconsistency_notify();
59
}
60
}
61
62
@@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
63
}
64
}
65
66
+void colo_compare_register_notifier(Notifier *notify)
67
+{
68
+ notifier_list_add(&colo_compare_notifiers, notify);
69
+}
70
+
71
+void colo_compare_unregister_notifier(Notifier *notify)
72
+{
73
+ notifier_remove(notify);
74
+}
75
+
76
static int colo_old_packet_check_one_conn(Connection *conn,
77
- void *user_data)
78
+ void *user_data)
79
{
80
GList *result = NULL;
81
int64_t check_time = REGULAR_PACKET_CHECK_MS;
82
@@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one_conn(Connection *conn,
83
84
if (result) {
85
/* Do checkpoint will flush old packet */
86
- /*
87
- * TODO: Notify colo frame to do checkpoint.
88
- * colo_compare_inconsistent_notify();
89
- */
90
+ colo_compare_inconsistency_notify();
91
return 0;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
95
/*
96
* If one packet arrive late, the secondary_list or
97
* primary_list will be empty, so we can't compare it
98
- * until next comparison.
99
+ * until next comparison. If the packets in the list are
100
+ * timeout, it will trigger a checkpoint request.
101
*/
102
trace_colo_compare_main("packet different");
103
g_queue_push_head(&conn->primary_list, pkt);
104
- /* TODO: colo_notify_checkpoint();*/
105
+ colo_compare_inconsistency_notify();
106
break;
107
}
108
}
109
diff --git a/net/colo-compare.h b/net/colo-compare.h
110
index XXXXXXX..XXXXXXX 100644
111
--- a/net/colo-compare.h
112
+++ b/net/colo-compare.h
113
@@ -XXX,XX +XXX,XX @@
114
#define QEMU_COLO_COMPARE_H
115
116
void colo_notify_compares_event(void *opaque, int event, Error **errp);
117
+void colo_compare_register_notifier(Notifier *notify);
118
+void colo_compare_unregister_notifier(Notifier *notify);
119
120
#endif /* QEMU_COLO_COMPARE_H */
121
--
122
2.17.1
123
124
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
Make sure master start block replication after slave's block
4
replication started.
5
6
Besides, we need to activate VM's blocks before goes into
7
COLO state.
8
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
migration/colo.c | 43 +++++++++++++++++++++++++++++++++++++++++++
16
migration/migration.c | 10 ++++++++++
17
2 files changed, 53 insertions(+)
18
19
diff --git a/migration/colo.c b/migration/colo.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/migration/colo.c
22
+++ b/migration/colo.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "replication.h"
25
#include "net/colo-compare.h"
26
#include "net/colo.h"
27
+#include "block/block.h"
28
29
static bool vmstate_loading;
30
static Notifier packets_compare_notifier;
31
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
32
{
33
int old_state;
34
MigrationIncomingState *mis = migration_incoming_get_current();
35
+ Error *local_err = NULL;
36
37
/* Can not do failover during the process of VM's loading VMstate, Or
38
* it will break the secondary VM.
39
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
40
migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
41
MIGRATION_STATUS_COMPLETED);
42
43
+ replication_stop_all(true, &local_err);
44
+ if (local_err) {
45
+ error_report_err(local_err);
46
+ }
47
+
48
if (!autostart) {
49
error_report("\"-S\" qemu option will be ignored in secondary side");
50
/* recover runstate to normal migration finish state */
51
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
52
{
53
MigrationState *s = migrate_get_current();
54
int old_state;
55
+ Error *local_err = NULL;
56
57
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
58
MIGRATION_STATUS_COMPLETED);
59
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
60
FailoverStatus_str(old_state));
61
return;
62
}
63
+
64
+ replication_stop_all(true, &local_err);
65
+ if (local_err) {
66
+ error_report_err(local_err);
67
+ local_err = NULL;
68
+ }
69
+
70
/* Notify COLO thread that failover work is finished */
71
qemu_sem_post(&s->colo_exit_sem);
72
}
73
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
74
qemu_savevm_state_header(fb);
75
qemu_savevm_state_setup(fb);
76
qemu_mutex_lock_iothread();
77
+ replication_do_checkpoint_all(&local_err);
78
+ if (local_err) {
79
+ qemu_mutex_unlock_iothread();
80
+ goto out;
81
+ }
82
qemu_savevm_state_complete_precopy(fb, false, false);
83
qemu_mutex_unlock_iothread();
84
85
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
86
object_unref(OBJECT(bioc));
87
88
qemu_mutex_lock_iothread();
89
+ replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
90
+ if (local_err) {
91
+ qemu_mutex_unlock_iothread();
92
+ goto out;
93
+ }
94
+
95
vm_start();
96
qemu_mutex_unlock_iothread();
97
trace_colo_vm_state_change("stop", "run");
98
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
99
object_unref(OBJECT(bioc));
100
101
qemu_mutex_lock_iothread();
102
+ replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
103
+ if (local_err) {
104
+ qemu_mutex_unlock_iothread();
105
+ goto out;
106
+ }
107
vm_start();
108
trace_colo_vm_state_change("stop", "run");
109
qemu_mutex_unlock_iothread();
110
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
111
goto out;
112
}
113
114
+ replication_get_error_all(&local_err);
115
+ if (local_err) {
116
+ qemu_mutex_unlock_iothread();
117
+ goto out;
118
+ }
119
+ /* discard colo disk buffer */
120
+ replication_do_checkpoint_all(&local_err);
121
+ if (local_err) {
122
+ qemu_mutex_unlock_iothread();
123
+ goto out;
124
+ }
125
+
126
vmstate_loading = false;
127
vm_start();
128
trace_colo_vm_state_change("stop", "run");
129
diff --git a/migration/migration.c b/migration/migration.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/migration/migration.c
132
+++ b/migration/migration.c
133
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
134
MigrationIncomingState *mis = migration_incoming_get_current();
135
PostcopyState ps;
136
int ret;
137
+ Error *local_err = NULL;
138
139
assert(mis->from_src_file);
140
mis->migration_incoming_co = qemu_coroutine_self();
141
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
142
143
/* we get COLO info, and know if we are in COLO mode */
144
if (!ret && migration_incoming_enable_colo()) {
145
+ /* Make sure all file formats flush their mutable metadata */
146
+ bdrv_invalidate_cache_all(&local_err);
147
+ if (local_err) {
148
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
149
+ MIGRATION_STATUS_FAILED);
150
+ error_report_err(local_err);
151
+ exit(EXIT_FAILURE);
152
+ }
153
+
154
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
155
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
156
mis->have_colo_incoming_thread = true;
157
--
158
2.17.1
159
160
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We need to know if migration is going into COLO state for
4
incoming side before start normal migration.
5
6
Instead by using the VMStateDescription to send colo_state
7
from source side to destination side, we use MIG_CMD_ENABLE_COLO
8
to indicate whether COLO is enabled or not.
9
10
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
include/migration/colo.h | 5 +--
17
migration/Makefile.objs | 2 +-
18
migration/colo-comm.c | 76 ----------------------------------------
19
migration/colo.c | 13 ++++++-
20
migration/migration.c | 23 +++++++++++-
21
migration/savevm.c | 17 +++++++++
22
migration/savevm.h | 1 +
23
migration/trace-events | 1 +
24
vl.c | 2 --
25
9 files changed, 57 insertions(+), 83 deletions(-)
26
delete mode 100644 migration/colo-comm.c
27
28
diff --git a/include/migration/colo.h b/include/migration/colo.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/migration/colo.h
31
+++ b/include/migration/colo.h
32
@@ -XXX,XX +XXX,XX @@ void migrate_start_colo_process(MigrationState *s);
33
bool migration_in_colo_state(void);
34
35
/* loadvm */
36
-bool migration_incoming_enable_colo(void);
37
-void migration_incoming_exit_colo(void);
38
+void migration_incoming_enable_colo(void);
39
+void migration_incoming_disable_colo(void);
40
+bool migration_incoming_colo_enabled(void);
41
void *colo_process_incoming_thread(void *opaque);
42
bool migration_incoming_in_colo_state(void);
43
44
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
45
index XXXXXXX..XXXXXXX 100644
46
--- a/migration/Makefile.objs
47
+++ b/migration/Makefile.objs
48
@@ -XXX,XX +XXX,XX @@
49
common-obj-y += migration.o socket.o fd.o exec.o
50
common-obj-y += tls.o channel.o savevm.o
51
-common-obj-y += colo-comm.o colo.o colo-failover.o
52
+common-obj-y += colo.o colo-failover.o
53
common-obj-y += vmstate.o vmstate-types.o page_cache.o
54
common-obj-y += qemu-file.o global_state.o
55
common-obj-y += qemu-file-channel.o
56
diff --git a/migration/colo-comm.c b/migration/colo-comm.c
57
deleted file mode 100644
58
index XXXXXXX..XXXXXXX
59
--- a/migration/colo-comm.c
60
+++ /dev/null
61
@@ -XXX,XX +XXX,XX @@
62
-/*
63
- * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
64
- * (a.k.a. Fault Tolerance or Continuous Replication)
65
- *
66
- * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
67
- * Copyright (c) 2016 FUJITSU LIMITED
68
- * Copyright (c) 2016 Intel Corporation
69
- *
70
- * This work is licensed under the terms of the GNU GPL, version 2 or
71
- * later. See the COPYING file in the top-level directory.
72
- *
73
- */
74
-
75
-#include "qemu/osdep.h"
76
-#include "migration.h"
77
-#include "migration/colo.h"
78
-#include "migration/vmstate.h"
79
-#include "trace.h"
80
-
81
-typedef struct {
82
- bool colo_requested;
83
-} COLOInfo;
84
-
85
-static COLOInfo colo_info;
86
-
87
-COLOMode get_colo_mode(void)
88
-{
89
- if (migration_in_colo_state()) {
90
- return COLO_MODE_PRIMARY;
91
- } else if (migration_incoming_in_colo_state()) {
92
- return COLO_MODE_SECONDARY;
93
- } else {
94
- return COLO_MODE_UNKNOWN;
95
- }
96
-}
97
-
98
-static int colo_info_pre_save(void *opaque)
99
-{
100
- COLOInfo *s = opaque;
101
-
102
- s->colo_requested = migrate_colo_enabled();
103
-
104
- return 0;
105
-}
106
-
107
-static bool colo_info_need(void *opaque)
108
-{
109
- return migrate_colo_enabled();
110
-}
111
-
112
-static const VMStateDescription colo_state = {
113
- .name = "COLOState",
114
- .version_id = 1,
115
- .minimum_version_id = 1,
116
- .pre_save = colo_info_pre_save,
117
- .needed = colo_info_need,
118
- .fields = (VMStateField[]) {
119
- VMSTATE_BOOL(colo_requested, COLOInfo),
120
- VMSTATE_END_OF_LIST()
121
- },
122
-};
123
-
124
-void colo_info_init(void)
125
-{
126
- vmstate_register(NULL, 0, &colo_state, &colo_info);
127
-}
128
-
129
-bool migration_incoming_enable_colo(void)
130
-{
131
- return colo_info.colo_requested;
132
-}
133
-
134
-void migration_incoming_exit_colo(void)
135
-{
136
- colo_info.colo_requested = false;
137
-}
138
diff --git a/migration/colo.c b/migration/colo.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/migration/colo.c
141
+++ b/migration/colo.c
142
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
143
qemu_sem_post(&s->colo_exit_sem);
144
}
145
146
+COLOMode get_colo_mode(void)
147
+{
148
+ if (migration_in_colo_state()) {
149
+ return COLO_MODE_PRIMARY;
150
+ } else if (migration_incoming_in_colo_state()) {
151
+ return COLO_MODE_SECONDARY;
152
+ } else {
153
+ return COLO_MODE_UNKNOWN;
154
+ }
155
+}
156
+
157
void colo_do_failover(MigrationState *s)
158
{
159
/* Make sure VM stopped while failover happened. */
160
@@ -XXX,XX +XXX,XX @@ out:
161
if (mis->to_src_file) {
162
qemu_fclose(mis->to_src_file);
163
}
164
- migration_incoming_exit_colo();
165
+ migration_incoming_disable_colo();
166
167
rcu_unregister_thread();
168
return NULL;
169
diff --git a/migration/migration.c b/migration/migration.c
170
index XXXXXXX..XXXXXXX 100644
171
--- a/migration/migration.c
172
+++ b/migration/migration.c
173
@@ -XXX,XX +XXX,XX @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
174
return migrate_send_rp_message(mis, msg_type, msglen, bufc);
175
}
176
177
+static bool migration_colo_enabled;
178
+bool migration_incoming_colo_enabled(void)
179
+{
180
+ return migration_colo_enabled;
181
+}
182
+
183
+void migration_incoming_disable_colo(void)
184
+{
185
+ migration_colo_enabled = false;
186
+}
187
+
188
+void migration_incoming_enable_colo(void)
189
+{
190
+ migration_colo_enabled = true;
191
+}
192
+
193
void qemu_start_incoming_migration(const char *uri, Error **errp)
194
{
195
const char *p;
196
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
197
}
198
199
/* we get COLO info, and know if we are in COLO mode */
200
- if (!ret && migration_incoming_enable_colo()) {
201
+ if (!ret && migration_incoming_colo_enabled()) {
202
/* Make sure all file formats flush their mutable metadata */
203
bdrv_invalidate_cache_all(&local_err);
204
if (local_err) {
205
@@ -XXX,XX +XXX,XX @@ static void *migration_thread(void *opaque)
206
qemu_savevm_send_postcopy_advise(s->to_dst_file);
207
}
208
209
+ if (migrate_colo_enabled()) {
210
+ /* Notify migration destination that we enable COLO */
211
+ qemu_savevm_send_colo_enable(s->to_dst_file);
212
+ }
213
+
214
qemu_savevm_state_setup(s->to_dst_file);
215
216
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
217
diff --git a/migration/savevm.c b/migration/savevm.c
218
index XXXXXXX..XXXXXXX 100644
219
--- a/migration/savevm.c
220
+++ b/migration/savevm.c
221
@@ -XXX,XX +XXX,XX @@
222
#include "io/channel-file.h"
223
#include "sysemu/replay.h"
224
#include "qjson.h"
225
+#include "migration/colo.h"
226
227
#ifndef ETH_P_RARP
228
#define ETH_P_RARP 0x8035
229
@@ -XXX,XX +XXX,XX @@ enum qemu_vm_cmd {
230
were previously sent during
231
precopy but are dirty. */
232
MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream */
233
+ MIG_CMD_ENABLE_COLO, /* Enable COLO */
234
MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */
235
MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */
236
MIG_CMD_MAX
237
@@ -XXX,XX +XXX,XX @@ static void qemu_savevm_command_send(QEMUFile *f,
238
qemu_fflush(f);
239
}
240
241
+void qemu_savevm_send_colo_enable(QEMUFile *f)
242
+{
243
+ trace_savevm_send_colo_enable();
244
+ qemu_savevm_command_send(f, MIG_CMD_ENABLE_COLO, 0, NULL);
245
+}
246
+
247
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value)
248
{
249
uint32_t buf;
250
@@ -XXX,XX +XXX,XX @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
251
return 0;
252
}
253
254
+static int loadvm_process_enable_colo(MigrationIncomingState *mis)
255
+{
256
+ migration_incoming_enable_colo();
257
+ return 0;
258
+}
259
+
260
/*
261
* Process an incoming 'QEMU_VM_COMMAND'
262
* 0 just a normal return
263
@@ -XXX,XX +XXX,XX @@ static int loadvm_process_command(QEMUFile *f)
264
265
case MIG_CMD_RECV_BITMAP:
266
return loadvm_handle_recv_bitmap(mis, len);
267
+
268
+ case MIG_CMD_ENABLE_COLO:
269
+ return loadvm_process_enable_colo(mis);
270
}
271
272
return 0;
273
diff --git a/migration/savevm.h b/migration/savevm.h
274
index XXXXXXX..XXXXXXX 100644
275
--- a/migration/savevm.h
276
+++ b/migration/savevm.h
277
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
278
uint16_t len,
279
uint64_t *start_list,
280
uint64_t *length_list);
281
+void qemu_savevm_send_colo_enable(QEMUFile *f);
282
283
int qemu_loadvm_state(QEMUFile *f);
284
void qemu_loadvm_state_cleanup(void);
285
diff --git a/migration/trace-events b/migration/trace-events
286
index XXXXXXX..XXXXXXX 100644
287
--- a/migration/trace-events
288
+++ b/migration/trace-events
289
@@ -XXX,XX +XXX,XX @@ savevm_send_ping(uint32_t val) "0x%x"
290
savevm_send_postcopy_listen(void) ""
291
savevm_send_postcopy_run(void) ""
292
savevm_send_postcopy_resume(void) ""
293
+savevm_send_colo_enable(void) ""
294
savevm_send_recv_bitmap(char *name) "%s"
295
savevm_state_setup(void) ""
296
savevm_state_resume_prepare(void) ""
297
diff --git a/vl.c b/vl.c
298
index XXXXXXX..XXXXXXX 100644
299
--- a/vl.c
300
+++ b/vl.c
301
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
302
#endif
303
}
304
305
- colo_info_init();
306
-
307
if (net_init_clients(&err) < 0) {
308
error_report_err(err);
309
exit(1);
310
--
311
2.17.1
312
313
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We should not load PVM's state directly into SVM, because there maybe some
4
errors happen when SVM is receving data, which will break SVM.
5
6
We need to ensure receving all data before load the state into SVM. We use
7
an extra memory to cache these data (PVM's ram). The ram cache in secondary side
8
is initially the same as SVM/PVM's memory. And in the process of checkpoint,
9
we cache the dirty pages of PVM into this ram cache firstly, so this ram cache
10
always the same as PVM's memory at every checkpoint, then we flush this cached ram
11
to SVM after we receive all PVM's state.
12
13
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
14
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
15
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
16
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
17
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
20
include/exec/ram_addr.h | 1 +
21
migration/migration.c | 7 ++++
22
migration/ram.c | 83 ++++++++++++++++++++++++++++++++++++++++-
23
migration/ram.h | 4 ++
24
migration/savevm.c | 2 +-
25
5 files changed, 94 insertions(+), 3 deletions(-)
26
27
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/exec/ram_addr.h
30
+++ b/include/exec/ram_addr.h
31
@@ -XXX,XX +XXX,XX @@ struct RAMBlock {
32
struct rcu_head rcu;
33
struct MemoryRegion *mr;
34
uint8_t *host;
35
+ uint8_t *colo_cache; /* For colo, VM's ram cache */
36
ram_addr_t offset;
37
ram_addr_t used_length;
38
ram_addr_t max_length;
39
diff --git a/migration/migration.c b/migration/migration.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/migration/migration.c
42
+++ b/migration/migration.c
43
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
44
exit(EXIT_FAILURE);
45
}
46
47
+ if (colo_init_ram_cache() < 0) {
48
+ error_report("Init ram cache failed");
49
+ exit(EXIT_FAILURE);
50
+ }
51
+
52
qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
53
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
54
mis->have_colo_incoming_thread = true;
55
@@ -XXX,XX +XXX,XX @@ static void process_incoming_migration_co(void *opaque)
56
57
/* Wait checkpoint incoming thread exit before free resource */
58
qemu_thread_join(&mis->colo_incoming_thread);
59
+ /* We hold the global iothread lock, so it is safe here */
60
+ colo_release_ram_cache();
61
}
62
63
if (ret < 0) {
64
diff --git a/migration/ram.c b/migration/ram.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/migration/ram.c
67
+++ b/migration/ram.c
68
@@ -XXX,XX +XXX,XX @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
69
return block->host + offset;
70
}
71
72
+static inline void *colo_cache_from_block_offset(RAMBlock *block,
73
+ ram_addr_t offset)
74
+{
75
+ if (!offset_in_ramblock(block, offset)) {
76
+ return NULL;
77
+ }
78
+ if (!block->colo_cache) {
79
+ error_report("%s: colo_cache is NULL in block :%s",
80
+ __func__, block->idstr);
81
+ return NULL;
82
+ }
83
+ return block->colo_cache + offset;
84
+}
85
+
86
/**
87
* ram_handle_compressed: handle the zero page case
88
*
89
@@ -XXX,XX +XXX,XX @@ static void decompress_data_with_multi_threads(QEMUFile *f,
90
qemu_mutex_unlock(&decomp_done_lock);
91
}
92
93
+/*
94
+ * colo cache: this is for secondary VM, we cache the whole
95
+ * memory of the secondary VM, it is need to hold the global lock
96
+ * to call this helper.
97
+ */
98
+int colo_init_ram_cache(void)
99
+{
100
+ RAMBlock *block;
101
+
102
+ rcu_read_lock();
103
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
104
+ block->colo_cache = qemu_anon_ram_alloc(block->used_length,
105
+ NULL,
106
+ false);
107
+ if (!block->colo_cache) {
108
+ error_report("%s: Can't alloc memory for COLO cache of block %s,"
109
+ "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
110
+ block->used_length);
111
+ goto out_locked;
112
+ }
113
+ memcpy(block->colo_cache, block->host, block->used_length);
114
+ }
115
+ rcu_read_unlock();
116
+ return 0;
117
+
118
+out_locked:
119
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
120
+ if (block->colo_cache) {
121
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
122
+ block->colo_cache = NULL;
123
+ }
124
+ }
125
+
126
+ rcu_read_unlock();
127
+ return -errno;
128
+}
129
+
130
+/* It is need to hold the global lock to call this helper */
131
+void colo_release_ram_cache(void)
132
+{
133
+ RAMBlock *block;
134
+
135
+ rcu_read_lock();
136
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
137
+ if (block->colo_cache) {
138
+ qemu_anon_ram_free(block->colo_cache, block->used_length);
139
+ block->colo_cache = NULL;
140
+ }
141
+ }
142
+ rcu_read_unlock();
143
+}
144
+
145
/**
146
* ram_load_setup: Setup RAM for migration incoming side
147
*
148
@@ -XXX,XX +XXX,XX @@ static int ram_load_setup(QEMUFile *f, void *opaque)
149
150
xbzrle_load_setup();
151
ramblock_recv_map_init();
152
+
153
return 0;
154
}
155
156
@@ -XXX,XX +XXX,XX @@ static int ram_load_cleanup(void *opaque)
157
g_free(rb->receivedmap);
158
rb->receivedmap = NULL;
159
}
160
+
161
return 0;
162
}
163
164
@@ -XXX,XX +XXX,XX @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
165
RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
166
RAMBlock *block = ram_block_from_stream(f, flags);
167
168
- host = host_from_ram_block_offset(block, addr);
169
+ /*
170
+ * After going into COLO, we should load the Page into colo_cache.
171
+ */
172
+ if (migration_incoming_in_colo_state()) {
173
+ host = colo_cache_from_block_offset(block, addr);
174
+ } else {
175
+ host = host_from_ram_block_offset(block, addr);
176
+ }
177
if (!host) {
178
error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
179
ret = -EINVAL;
180
break;
181
}
182
- ramblock_recv_bitmap_set(block, host);
183
+
184
+ if (!migration_incoming_in_colo_state()) {
185
+ ramblock_recv_bitmap_set(block, host);
186
+ }
187
+
188
trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
189
}
190
191
diff --git a/migration/ram.h b/migration/ram.h
192
index XXXXXXX..XXXXXXX 100644
193
--- a/migration/ram.h
194
+++ b/migration/ram.h
195
@@ -XXX,XX +XXX,XX @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
196
const char *block_name);
197
int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
198
199
+/* ram cache */
200
+int colo_init_ram_cache(void);
201
+void colo_release_ram_cache(void);
202
+
203
#endif
204
diff --git a/migration/savevm.c b/migration/savevm.c
205
index XXXXXXX..XXXXXXX 100644
206
--- a/migration/savevm.c
207
+++ b/migration/savevm.c
208
@@ -XXX,XX +XXX,XX @@ static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis,
209
static int loadvm_process_enable_colo(MigrationIncomingState *mis)
210
{
211
migration_incoming_enable_colo();
212
- return 0;
213
+ return colo_init_ram_cache();
214
}
215
216
/*
217
--
218
2.17.1
219
220
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
We record the address of the dirty pages that received,
4
it will help flushing pages that cached into SVM.
5
6
Here, it is a trick, we record dirty pages by re-using migration
7
dirty bitmap. In the later patch, we will start the dirty log
8
for SVM, just like migration, in this way, we can record both
9
the dirty pages caused by PVM and SVM, we only flush those dirty
10
pages from RAM cache while do checkpoint.
11
12
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
migration/ram.c | 43 ++++++++++++++++++++++++++++++++++++++++---
19
1 file changed, 40 insertions(+), 3 deletions(-)
20
21
diff --git a/migration/ram.c b/migration/ram.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/migration/ram.c
24
+++ b/migration/ram.c
25
@@ -XXX,XX +XXX,XX @@ static inline void *colo_cache_from_block_offset(RAMBlock *block,
26
__func__, block->idstr);
27
return NULL;
28
}
29
+
30
+ /*
31
+ * During colo checkpoint, we need bitmap of these migrated pages.
32
+ * It help us to decide which pages in ram cache should be flushed
33
+ * into VM's RAM later.
34
+ */
35
+ if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
36
+ ram_state->migration_dirty_pages++;
37
+ }
38
return block->colo_cache + offset;
39
}
40
41
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
42
RAMBlock *block;
43
44
rcu_read_lock();
45
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
46
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
47
block->colo_cache = qemu_anon_ram_alloc(block->used_length,
48
NULL,
49
false);
50
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
51
memcpy(block->colo_cache, block->host, block->used_length);
52
}
53
rcu_read_unlock();
54
+ /*
55
+ * Record the dirty pages that sent by PVM, we use this dirty bitmap together
56
+ * with to decide which page in cache should be flushed into SVM's RAM. Here
57
+ * we use the same name 'ram_bitmap' as for migration.
58
+ */
59
+ if (ram_bytes_total()) {
60
+ RAMBlock *block;
61
+
62
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
63
+ unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
64
+
65
+ block->bmap = bitmap_new(pages);
66
+ bitmap_set(block->bmap, 0, pages);
67
+ }
68
+ }
69
+ ram_state = g_new0(RAMState, 1);
70
+ ram_state->migration_dirty_pages = 0;
71
+
72
return 0;
73
74
out_locked:
75
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
76
+
77
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
78
if (block->colo_cache) {
79
qemu_anon_ram_free(block->colo_cache, block->used_length);
80
block->colo_cache = NULL;
81
@@ -XXX,XX +XXX,XX @@ void colo_release_ram_cache(void)
82
{
83
RAMBlock *block;
84
85
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
86
+ g_free(block->bmap);
87
+ block->bmap = NULL;
88
+ }
89
+
90
rcu_read_lock();
91
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
92
+
93
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
94
if (block->colo_cache) {
95
qemu_anon_ram_free(block->colo_cache, block->used_length);
96
block->colo_cache = NULL;
97
}
98
}
99
+
100
rcu_read_unlock();
101
+ g_free(ram_state);
102
+ ram_state = NULL;
103
}
104
105
/**
106
--
107
2.17.1
108
109
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
During the time of VM's running, PVM may dirty some pages, we will transfer
4
PVM's dirty pages to SVM and store them into SVM's RAM cache at next checkpoint
5
time. So, the content of SVM's RAM cache will always be same with PVM's memory
6
after checkpoint.
7
8
Instead of flushing all content of PVM's RAM cache into SVM's MEMORY,
9
we do this in a more efficient way:
10
Only flush any page that dirtied by PVM since last checkpoint.
11
In this way, we can ensure SVM's memory same with PVM's.
12
13
Besides, we must ensure flush RAM cache before load device state.
14
15
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
16
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
17
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
20
migration/ram.c | 37 +++++++++++++++++++++++++++++++++++++
21
migration/trace-events | 2 ++
22
2 files changed, 39 insertions(+)
23
24
diff --git a/migration/ram.c b/migration/ram.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/migration/ram.c
27
+++ b/migration/ram.c
28
@@ -XXX,XX +XXX,XX @@ static bool postcopy_is_running(void)
29
return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
30
}
31
32
+/*
33
+ * Flush content of RAM cache into SVM's memory.
34
+ * Only flush the pages that be dirtied by PVM or SVM or both.
35
+ */
36
+static void colo_flush_ram_cache(void)
37
+{
38
+ RAMBlock *block = NULL;
39
+ void *dst_host;
40
+ void *src_host;
41
+ unsigned long offset = 0;
42
+
43
+ trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
44
+ rcu_read_lock();
45
+ block = QLIST_FIRST_RCU(&ram_list.blocks);
46
+
47
+ while (block) {
48
+ offset = migration_bitmap_find_dirty(ram_state, block, offset);
49
+
50
+ if (offset << TARGET_PAGE_BITS >= block->used_length) {
51
+ offset = 0;
52
+ block = QLIST_NEXT_RCU(block, next);
53
+ } else {
54
+ migration_bitmap_clear_dirty(ram_state, block, offset);
55
+ dst_host = block->host + (offset << TARGET_PAGE_BITS);
56
+ src_host = block->colo_cache + (offset << TARGET_PAGE_BITS);
57
+ memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
58
+ }
59
+ }
60
+
61
+ rcu_read_unlock();
62
+ trace_colo_flush_ram_cache_end();
63
+}
64
+
65
static int ram_load(QEMUFile *f, void *opaque, int version_id)
66
{
67
int flags = 0, ret = 0, invalid_flags = 0;
68
@@ -XXX,XX +XXX,XX @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
69
ret |= wait_for_decompress_done();
70
rcu_read_unlock();
71
trace_ram_load_complete(ret, seq_iter);
72
+
73
+ if (!ret && migration_incoming_in_colo_state()) {
74
+ colo_flush_ram_cache();
75
+ }
76
return ret;
77
}
78
79
diff --git a/migration/trace-events b/migration/trace-events
80
index XXXXXXX..XXXXXXX 100644
81
--- a/migration/trace-events
82
+++ b/migration/trace-events
83
@@ -XXX,XX +XXX,XX @@ ram_dirty_bitmap_sync_start(void) ""
84
ram_dirty_bitmap_sync_wait(void) ""
85
ram_dirty_bitmap_sync_complete(void) ""
86
ram_state_resume_prepare(uint64_t v) "%" PRId64
87
+colo_flush_ram_cache_begin(uint64_t dirty_pages) "dirty_pages %" PRIu64
88
+colo_flush_ram_cache_end(void) ""
89
90
# migration/migration.c
91
await_return_path_close_on_source_close(void) ""
92
--
93
2.17.1
94
95
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
If some errors happen during VM's COLO FT stage, it's important to
4
notify the users of this event. Together with 'x-colo-lost-heartbeat',
5
Users can intervene in COLO's failover work immediately.
6
If users don't want to get involved in COLO's failover verdict,
7
it is still necessary to notify users that we exited COLO mode.
8
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
11
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
migration/colo.c | 31 +++++++++++++++++++++++++++++++
16
qapi/migration.json | 38 ++++++++++++++++++++++++++++++++++++++
17
2 files changed, 69 insertions(+)
18
19
diff --git a/migration/colo.c b/migration/colo.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/migration/colo.c
22
+++ b/migration/colo.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "net/colo-compare.h"
25
#include "net/colo.h"
26
#include "block/block.h"
27
+#include "qapi/qapi-events-migration.h"
28
29
static bool vmstate_loading;
30
static Notifier packets_compare_notifier;
31
@@ -XXX,XX +XXX,XX @@ out:
32
qemu_fclose(fb);
33
}
34
35
+ /*
36
+ * There are only two reasons we can get here, some error happened
37
+ * or the user triggered failover.
38
+ */
39
+ switch (failover_get_state()) {
40
+ case FAILOVER_STATUS_NONE:
41
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
42
+ COLO_EXIT_REASON_ERROR);
43
+ break;
44
+ case FAILOVER_STATUS_REQUIRE:
45
+ qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
46
+ COLO_EXIT_REASON_REQUEST);
47
+ break;
48
+ default:
49
+ abort();
50
+ }
51
+
52
/* Hope this not to be too long to wait here */
53
qemu_sem_wait(&s->colo_exit_sem);
54
qemu_sem_destroy(&s->colo_exit_sem);
55
@@ -XXX,XX +XXX,XX @@ out:
56
error_report_err(local_err);
57
}
58
59
+ switch (failover_get_state()) {
60
+ case FAILOVER_STATUS_NONE:
61
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
62
+ COLO_EXIT_REASON_ERROR);
63
+ break;
64
+ case FAILOVER_STATUS_REQUIRE:
65
+ qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
66
+ COLO_EXIT_REASON_REQUEST);
67
+ break;
68
+ default:
69
+ abort();
70
+ }
71
+
72
if (fb) {
73
qemu_fclose(fb);
74
}
75
diff --git a/qapi/migration.json b/qapi/migration.json
76
index XXXXXXX..XXXXXXX 100644
77
--- a/qapi/migration.json
78
+++ b/qapi/migration.json
79
@@ -XXX,XX +XXX,XX @@
80
{ 'enum': 'FailoverStatus',
81
'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] }
82
83
+##
84
+# @COLO_EXIT:
85
+#
86
+# Emitted when VM finishes COLO mode due to some errors happening or
87
+# at the request of users.
88
+#
89
+# @mode: report COLO mode when COLO exited.
90
+#
91
+# @reason: describes the reason for the COLO exit.
92
+#
93
+# Since: 3.1
94
+#
95
+# Example:
96
+#
97
+# <- { "timestamp": {"seconds": 2032141960, "microseconds": 417172},
98
+# "event": "COLO_EXIT", "data": {"mode": "primary", "reason": "request" } }
99
+#
100
+##
101
+{ 'event': 'COLO_EXIT',
102
+ 'data': {'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
103
+
104
+##
105
+# @COLOExitReason:
106
+#
107
+# The reason for a COLO exit
108
+#
109
+# @none: no failover has ever happened. This can't occur in the
110
+# COLO_EXIT event, only in the result of query-colo-status.
111
+#
112
+# @request: COLO exit is due to an external request
113
+#
114
+# @error: COLO exit is due to an internal error
115
+#
116
+# Since: 3.1
117
+##
118
+{ 'enum': 'COLOExitReason',
119
+ 'data': [ 'none', 'request', 'error' ] }
120
+
121
##
122
# @x-colo-lost-heartbeat:
123
#
124
--
125
2.17.1
126
127
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <chen.zhang@intel.com>
2
1
3
Suggested by Markus Armbruster rename COLO unknown mode to none mode.
4
5
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
6
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo-failover.c | 2 +-
12
migration/colo.c | 2 +-
13
qapi/migration.json | 10 +++++-----
14
3 files changed, 7 insertions(+), 7 deletions(-)
15
16
diff --git a/migration/colo-failover.c b/migration/colo-failover.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/migration/colo-failover.c
19
+++ b/migration/colo-failover.c
20
@@ -XXX,XX +XXX,XX @@ FailoverStatus failover_get_state(void)
21
22
void qmp_x_colo_lost_heartbeat(Error **errp)
23
{
24
- if (get_colo_mode() == COLO_MODE_UNKNOWN) {
25
+ if (get_colo_mode() == COLO_MODE_NONE) {
26
error_setg(errp, QERR_FEATURE_DISABLED, "colo");
27
return;
28
}
29
diff --git a/migration/colo.c b/migration/colo.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/migration/colo.c
32
+++ b/migration/colo.c
33
@@ -XXX,XX +XXX,XX @@ COLOMode get_colo_mode(void)
34
} else if (migration_incoming_in_colo_state()) {
35
return COLO_MODE_SECONDARY;
36
} else {
37
- return COLO_MODE_UNKNOWN;
38
+ return COLO_MODE_NONE;
39
}
40
}
41
42
diff --git a/qapi/migration.json b/qapi/migration.json
43
index XXXXXXX..XXXXXXX 100644
44
--- a/qapi/migration.json
45
+++ b/qapi/migration.json
46
@@ -XXX,XX +XXX,XX @@
47
##
48
# @COLOMode:
49
#
50
-# The colo mode
51
+# The COLO current mode.
52
#
53
-# @unknown: unknown mode
54
+# @none: COLO is disabled.
55
#
56
-# @primary: master side
57
+# @primary: COLO node in primary side.
58
#
59
-# @secondary: slave side
60
+# @secondary: COLO node in slave side.
61
#
62
# Since: 2.8
63
##
64
{ 'enum': 'COLOMode',
65
- 'data': [ 'unknown', 'primary', 'secondary'] }
66
+ 'data': [ 'none', 'primary', 'secondary'] }
67
68
##
69
# @FailoverStatus:
70
--
71
2.17.1
72
73
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
Libvirt or other high level software can use this command query colo status.
4
You can test this command like that:
5
{'execute':'query-colo-status'}
6
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
migration/colo.c | 21 +++++++++++++++++++++
12
qapi/migration.json | 32 ++++++++++++++++++++++++++++++++
13
2 files changed, 53 insertions(+)
14
15
diff --git a/migration/colo.c b/migration/colo.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/migration/colo.c
18
+++ b/migration/colo.c
19
@@ -XXX,XX +XXX,XX @@
20
#include "net/colo.h"
21
#include "block/block.h"
22
#include "qapi/qapi-events-migration.h"
23
+#include "qapi/qmp/qerror.h"
24
25
static bool vmstate_loading;
26
static Notifier packets_compare_notifier;
27
@@ -XXX,XX +XXX,XX @@ void qmp_xen_colo_do_checkpoint(Error **errp)
28
#endif
29
}
30
31
+COLOStatus *qmp_query_colo_status(Error **errp)
32
+{
33
+ COLOStatus *s = g_new0(COLOStatus, 1);
34
+
35
+ s->mode = get_colo_mode();
36
+
37
+ switch (failover_get_state()) {
38
+ case FAILOVER_STATUS_NONE:
39
+ s->reason = COLO_EXIT_REASON_NONE;
40
+ break;
41
+ case FAILOVER_STATUS_REQUIRE:
42
+ s->reason = COLO_EXIT_REASON_REQUEST;
43
+ break;
44
+ default:
45
+ s->reason = COLO_EXIT_REASON_ERROR;
46
+ }
47
+
48
+ return s;
49
+}
50
+
51
static void colo_send_message(QEMUFile *f, COLOMessage msg,
52
Error **errp)
53
{
54
diff --git a/qapi/migration.json b/qapi/migration.json
55
index XXXXXXX..XXXXXXX 100644
56
--- a/qapi/migration.json
57
+++ b/qapi/migration.json
58
@@ -XXX,XX +XXX,XX @@
59
##
60
{ 'command': 'xen-colo-do-checkpoint' }
61
62
+##
63
+# @COLOStatus:
64
+#
65
+# The result format for 'query-colo-status'.
66
+#
67
+# @mode: COLO running mode. If COLO is running, this field will return
68
+# 'primary' or 'secondary'.
69
+#
70
+# @reason: describes the reason for the COLO exit.
71
+#
72
+# Since: 3.0
73
+##
74
+{ 'struct': 'COLOStatus',
75
+ 'data': { 'mode': 'COLOMode', 'reason': 'COLOExitReason' } }
76
+
77
+##
78
+# @query-colo-status:
79
+#
80
+# Query COLO status while the vm is running.
81
+#
82
+# Returns: A @COLOStatus object showing the status.
83
+#
84
+# Example:
85
+#
86
+# -> { "execute": "query-colo-status" }
87
+# <- { "return": { "mode": "primary", "active": true, "reason": "request" } }
88
+#
89
+# Since: 3.0
90
+##
91
+{ 'command': 'query-colo-status',
92
+ 'returns': 'COLOStatus' }
93
+
94
##
95
# @migrate-recover:
96
#
97
--
98
2.17.1
99
100
diff view generated by jsdifflib
1
From: Zhang Chen <zhangckid@gmail.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
There are several stages during loadvm/savevm process. In different stage,
3
When RSS is enabled the device tries to load the eBPF program
4
migration incoming processes different types of sections.
4
to select RX virtqueue in the TUN. If eBPF can be loaded
5
We want to control these stages more accuracy, it will benefit COLO
5
the RSS will function also with vhost (works with kernel 5.8 and later).
6
performance, we don't have to save type of QEMU_VM_SECTION_START
6
Software RSS is used as a fallback with vhost=off when eBPF can't be loaded
7
sections everytime while do checkpoint, besides, we want to separate
7
or when hash population requested by the guest.
8
the process of saving/loading memory and devices state.
8
9
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
10
So we add three new helper functions: qemu_load_device_state() and
10
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
11
qemu_savevm_live_state() to achieve different process during migration.
12
13
Besides, we make qemu_loadvm_state_main() and qemu_save_device_state()
14
public, and simplify the codes of qemu_save_device_state() by calling the
15
wrapper qemu_savevm_state_header().
16
17
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
18
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
19
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
20
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
21
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
22
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
23
---
12
---
24
migration/colo.c | 41 ++++++++++++++++++++++++++++++++---------
13
hw/net/vhost_net.c | 3 ++
25
migration/savevm.c | 36 +++++++++++++++++++++++++++++-------
14
hw/net/virtio-net.c | 116 +++++++++++++++++++++++++++++++++++++++--
26
migration/savevm.h | 4 ++++
15
include/hw/virtio/virtio-net.h | 4 ++
27
3 files changed, 65 insertions(+), 16 deletions(-)
16
net/vhost-vdpa.c | 2 +
28
17
4 files changed, 122 insertions(+), 3 deletions(-)
29
diff --git a/migration/colo.c b/migration/colo.c
18
30
index XXXXXXX..XXXXXXX 100644
19
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
31
--- a/migration/colo.c
20
index XXXXXXX..XXXXXXX 100644
32
+++ b/migration/colo.c
21
--- a/hw/net/vhost_net.c
22
+++ b/hw/net/vhost_net.c
23
@@ -XXX,XX +XXX,XX @@ static const int kernel_feature_bits[] = {
24
VIRTIO_NET_F_MTU,
25
VIRTIO_F_IOMMU_PLATFORM,
26
VIRTIO_F_RING_PACKED,
27
+ VIRTIO_NET_F_HASH_REPORT,
28
VHOST_INVALID_FEATURE_BIT
29
};
30
31
@@ -XXX,XX +XXX,XX @@ static const int user_feature_bits[] = {
32
VIRTIO_NET_F_MTU,
33
VIRTIO_F_IOMMU_PLATFORM,
34
VIRTIO_F_RING_PACKED,
35
+ VIRTIO_NET_F_RSS,
36
+ VIRTIO_NET_F_HASH_REPORT,
37
38
/* This bit implies RARP isn't sent by QEMU out of band */
39
VIRTIO_NET_F_GUEST_ANNOUNCE,
40
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/hw/net/virtio-net.c
43
+++ b/hw/net/virtio-net.c
44
@@ -XXX,XX +XXX,XX @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
45
return features;
46
}
47
48
- virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
49
- virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
50
+ if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
51
+ virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
52
+ }
53
features = vhost_net_get_features(get_vhost_net(nc->peer), features);
54
vdev->backend_features = features;
55
56
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
57
}
58
}
59
60
+static void virtio_net_detach_epbf_rss(VirtIONet *n);
61
+
62
static void virtio_net_disable_rss(VirtIONet *n)
63
{
64
if (n->rss_data.enabled) {
65
trace_virtio_net_rss_disable();
66
}
67
n->rss_data.enabled = false;
68
+
69
+ virtio_net_detach_epbf_rss(n);
70
+}
71
+
72
+static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
73
+{
74
+ NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
75
+ if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
76
+ return false;
77
+ }
78
+
79
+ return nc->info->set_steering_ebpf(nc, prog_fd);
80
+}
81
+
82
+static void rss_data_to_rss_config(struct VirtioNetRssData *data,
83
+ struct EBPFRSSConfig *config)
84
+{
85
+ config->redirect = data->redirect;
86
+ config->populate_hash = data->populate_hash;
87
+ config->hash_types = data->hash_types;
88
+ config->indirections_len = data->indirections_len;
89
+ config->default_queue = data->default_queue;
90
+}
91
+
92
+static bool virtio_net_attach_epbf_rss(VirtIONet *n)
93
+{
94
+ struct EBPFRSSConfig config = {};
95
+
96
+ if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
97
+ return false;
98
+ }
99
+
100
+ rss_data_to_rss_config(&n->rss_data, &config);
101
+
102
+ if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
103
+ n->rss_data.indirections_table, n->rss_data.key)) {
104
+ return false;
105
+ }
106
+
107
+ if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
108
+ return false;
109
+ }
110
+
111
+ return true;
112
+}
113
+
114
+static void virtio_net_detach_epbf_rss(VirtIONet *n)
115
+{
116
+ virtio_net_attach_ebpf_to_backend(n->nic, -1);
117
+}
118
+
119
+static bool virtio_net_load_ebpf(VirtIONet *n)
120
+{
121
+ if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
122
+ /* backend does't support steering ebpf */
123
+ return false;
124
+ }
125
+
126
+ return ebpf_rss_load(&n->ebpf_rss);
127
+}
128
+
129
+static void virtio_net_unload_ebpf(VirtIONet *n)
130
+{
131
+ virtio_net_attach_ebpf_to_backend(n->nic, -1);
132
+ ebpf_rss_unload(&n->ebpf_rss);
133
}
134
135
static uint16_t virtio_net_handle_rss(VirtIONet *n,
136
@@ -XXX,XX +XXX,XX @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
137
goto error;
138
}
139
n->rss_data.enabled = true;
140
+
141
+ if (!n->rss_data.populate_hash) {
142
+ if (!virtio_net_attach_epbf_rss(n)) {
143
+ /* EBPF must be loaded for vhost */
144
+ if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
145
+ warn_report("Can't load eBPF RSS for vhost");
146
+ goto error;
147
+ }
148
+ /* fallback to software RSS */
149
+ warn_report("Can't load eBPF RSS - fallback to software RSS");
150
+ n->rss_data.enabled_software_rss = true;
151
+ }
152
+ } else {
153
+ /* use software RSS for hash populating */
154
+ /* and detach eBPF if was loaded before */
155
+ virtio_net_detach_epbf_rss(n);
156
+ n->rss_data.enabled_software_rss = true;
157
+ }
158
+
159
trace_virtio_net_rss_enable(n->rss_data.hash_types,
160
n->rss_data.indirections_len,
161
temp.b);
162
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
163
return -1;
164
}
165
166
- if (!no_rss && n->rss_data.enabled) {
167
+ if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
168
int index = virtio_net_process_rss(nc, buf, size);
169
if (index >= 0) {
170
NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
171
@@ -XXX,XX +XXX,XX @@ static int virtio_net_post_load_device(void *opaque, int version_id)
172
}
173
174
if (n->rss_data.enabled) {
175
+ n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
176
+ if (!n->rss_data.populate_hash) {
177
+ if (!virtio_net_attach_epbf_rss(n)) {
178
+ if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
179
+ warn_report("Can't post-load eBPF RSS for vhost");
180
+ } else {
181
+ warn_report("Can't post-load eBPF RSS - "
182
+ "fallback to software RSS");
183
+ n->rss_data.enabled_software_rss = true;
184
+ }
185
+ }
186
+ }
187
+
188
trace_virtio_net_rss_enable(n->rss_data.hash_types,
189
n->rss_data.indirections_len,
190
sizeof(n->rss_data.key));
191
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
192
n->qdev = dev;
193
194
net_rx_pkt_init(&n->rx_pkt, false);
195
+
196
+ if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
197
+ virtio_net_load_ebpf(n);
198
+ }
199
}
200
201
static void virtio_net_device_unrealize(DeviceState *dev)
202
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_unrealize(DeviceState *dev)
203
VirtIONet *n = VIRTIO_NET(dev);
204
int i, max_queues;
205
206
+ if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
207
+ virtio_net_unload_ebpf(n);
208
+ }
209
+
210
/* This will stop vhost backend if appropriate. */
211
virtio_net_set_status(vdev, 0);
212
213
@@ -XXX,XX +XXX,XX @@ static void virtio_net_instance_init(Object *obj)
214
device_add_bootindex_property(obj, &n->nic_conf.bootindex,
215
"bootindex", "/ethernet-phy@0",
216
DEVICE(n));
217
+
218
+ ebpf_rss_init(&n->ebpf_rss);
219
}
220
221
static int virtio_net_pre_save(void *opaque)
222
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
223
index XXXXXXX..XXXXXXX 100644
224
--- a/include/hw/virtio/virtio-net.h
225
+++ b/include/hw/virtio/virtio-net.h
33
@@ -XXX,XX +XXX,XX @@
226
@@ -XXX,XX +XXX,XX @@
34
#include "block/block.h"
227
#include "qemu/option_int.h"
35
#include "qapi/qapi-events-migration.h"
228
#include "qom/object.h"
36
#include "qapi/qmp/qerror.h"
229
37
+#include "sysemu/cpus.h"
230
+#include "ebpf/ebpf_rss.h"
38
231
+
39
static bool vmstate_loading;
232
#define TYPE_VIRTIO_NET "virtio-net-device"
40
static Notifier packets_compare_notifier;
233
OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
41
@@ -XXX,XX +XXX,XX @@ static int colo_do_checkpoint_transaction(MigrationState *s,
234
42
235
@@ -XXX,XX +XXX,XX @@ typedef struct VirtioNetRscChain {
43
/* Disable block migration */
236
44
migrate_set_block_enabled(false, &local_err);
237
typedef struct VirtioNetRssData {
45
- qemu_savevm_state_header(fb);
238
bool enabled;
46
- qemu_savevm_state_setup(fb);
239
+ bool enabled_software_rss;
47
qemu_mutex_lock_iothread();
240
bool redirect;
48
replication_do_checkpoint_all(&local_err);
241
bool populate_hash;
49
if (local_err) {
242
uint32_t hash_types;
50
qemu_mutex_unlock_iothread();
243
@@ -XXX,XX +XXX,XX @@ struct VirtIONet {
51
goto out;
244
Notifier migration_state;
52
}
245
VirtioNetRssData rss_data;
53
- qemu_savevm_state_complete_precopy(fb, false, false);
246
struct NetRxPkt *rx_pkt;
54
- qemu_mutex_unlock_iothread();
247
+ struct EBPFRSSContext ebpf_rss;
55
-
56
- qemu_fflush(fb);
57
58
colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
59
if (local_err) {
60
+ qemu_mutex_unlock_iothread();
61
+ goto out;
62
+ }
63
+ /* Note: device state is saved into buffer */
64
+ ret = qemu_save_device_state(fb);
65
+
66
+ qemu_mutex_unlock_iothread();
67
+ if (ret < 0) {
68
goto out;
69
}
70
+ /*
71
+ * Only save VM's live state, which not including device state.
72
+ * TODO: We may need a timeout mechanism to prevent COLO process
73
+ * to be blocked here.
74
+ */
75
+ qemu_savevm_live_state(s->to_dst_file);
76
+
77
+ qemu_fflush(fb);
78
+
79
/*
80
* We need the size of the VMstate data in Secondary side,
81
* With which we can decide how much data should be read.
82
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
83
uint64_t total_size;
84
uint64_t value;
85
Error *local_err = NULL;
86
+ int ret;
87
88
rcu_register_thread();
89
qemu_sem_init(&mis->colo_incoming_sem, 0);
90
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
91
goto out;
92
}
93
94
+ qemu_mutex_lock_iothread();
95
+ cpu_synchronize_all_pre_loadvm();
96
+ ret = qemu_loadvm_state_main(mis->from_src_file, mis);
97
+ qemu_mutex_unlock_iothread();
98
+
99
+ if (ret < 0) {
100
+ error_report("Load VM's live state (ram) error");
101
+ goto out;
102
+ }
103
+
104
value = colo_receive_message_value(mis->from_src_file,
105
COLO_MESSAGE_VMSTATE_SIZE, &local_err);
106
if (local_err) {
107
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
108
}
109
110
qemu_mutex_lock_iothread();
111
- qemu_system_reset(SHUTDOWN_CAUSE_NONE);
112
vmstate_loading = true;
113
- if (qemu_loadvm_state(fb) < 0) {
114
- error_report("COLO: loadvm failed");
115
+ ret = qemu_load_device_state(fb);
116
+ if (ret < 0) {
117
+ error_report("COLO: load device state failed");
118
qemu_mutex_unlock_iothread();
119
goto out;
120
}
121
diff --git a/migration/savevm.c b/migration/savevm.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/migration/savevm.c
124
+++ b/migration/savevm.c
125
@@ -XXX,XX +XXX,XX @@ done:
126
return ret;
127
}
128
129
-static int qemu_save_device_state(QEMUFile *f)
130
+void qemu_savevm_live_state(QEMUFile *f)
131
{
132
- SaveStateEntry *se;
133
+ /* save QEMU_VM_SECTION_END section */
134
+ qemu_savevm_state_complete_precopy(f, true, false);
135
+ qemu_put_byte(f, QEMU_VM_EOF);
136
+}
137
138
- qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
139
- qemu_put_be32(f, QEMU_VM_FILE_VERSION);
140
+int qemu_save_device_state(QEMUFile *f)
141
+{
142
+ SaveStateEntry *se;
143
144
+ if (!migration_in_colo_state()) {
145
+ qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
146
+ qemu_put_be32(f, QEMU_VM_FILE_VERSION);
147
+ }
148
cpu_synchronize_all_states();
149
150
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
151
@@ -XXX,XX +XXX,XX @@ enum LoadVMExitCodes {
152
LOADVM_QUIT = 1,
153
};
248
};
154
249
155
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
250
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
156
-
251
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
157
/* ------ incoming postcopy messages ------ */
252
index XXXXXXX..XXXXXXX 100644
158
/* 'advise' arrives before any transfers just to tell us that a postcopy
253
--- a/net/vhost-vdpa.c
159
* *might* happen - it might be skipped if precopy transferred everything
254
+++ b/net/vhost-vdpa.c
160
@@ -XXX,XX +XXX,XX @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
255
@@ -XXX,XX +XXX,XX @@ const int vdpa_feature_bits[] = {
161
return true;
256
VIRTIO_NET_F_MTU,
162
}
257
VIRTIO_F_IOMMU_PLATFORM,
163
258
VIRTIO_F_RING_PACKED,
164
-static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
259
+ VIRTIO_NET_F_RSS,
165
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
260
+ VIRTIO_NET_F_HASH_REPORT,
166
{
261
VIRTIO_NET_F_GUEST_ANNOUNCE,
167
uint8_t section_type;
262
VIRTIO_NET_F_STATUS,
168
int ret = 0;
263
VHOST_INVALID_FEATURE_BIT
169
@@ -XXX,XX +XXX,XX @@ int qemu_loadvm_state(QEMUFile *f)
170
return ret;
171
}
172
173
+int qemu_load_device_state(QEMUFile *f)
174
+{
175
+ MigrationIncomingState *mis = migration_incoming_get_current();
176
+ int ret;
177
+
178
+ /* Load QEMU_VM_SECTION_FULL section */
179
+ ret = qemu_loadvm_state_main(f, mis);
180
+ if (ret < 0) {
181
+ error_report("Failed to load device state: %d", ret);
182
+ return ret;
183
+ }
184
+
185
+ cpu_synchronize_all_post_init();
186
+ return 0;
187
+}
188
+
189
int save_snapshot(const char *name, Error **errp)
190
{
191
BlockDriverState *bs, *bs1;
192
diff --git a/migration/savevm.h b/migration/savevm.h
193
index XXXXXXX..XXXXXXX 100644
194
--- a/migration/savevm.h
195
+++ b/migration/savevm.h
196
@@ -XXX,XX +XXX,XX @@ void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name,
197
uint64_t *start_list,
198
uint64_t *length_list);
199
void qemu_savevm_send_colo_enable(QEMUFile *f);
200
+void qemu_savevm_live_state(QEMUFile *f);
201
+int qemu_save_device_state(QEMUFile *f);
202
203
int qemu_loadvm_state(QEMUFile *f);
204
void qemu_loadvm_state_cleanup(void);
205
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
206
+int qemu_load_device_state(QEMUFile *f);
207
208
#endif
209
--
264
--
210
2.17.1
265
2.7.4
211
266
212
267
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
Don't need to flush all VM's ram from cache, only
4
flush the dirty pages since last checkpoint
5
6
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
7
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
migration/ram.c | 9 +++++++++
14
1 file changed, 9 insertions(+)
15
16
diff --git a/migration/ram.c b/migration/ram.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/migration/ram.c
19
+++ b/migration/ram.c
20
@@ -XXX,XX +XXX,XX @@ int colo_init_ram_cache(void)
21
}
22
ram_state = g_new0(RAMState, 1);
23
ram_state->migration_dirty_pages = 0;
24
+ memory_global_dirty_log_start();
25
26
return 0;
27
28
@@ -XXX,XX +XXX,XX @@ void colo_release_ram_cache(void)
29
{
30
RAMBlock *block;
31
32
+ memory_global_dirty_log_stop();
33
RAMBLOCK_FOREACH_MIGRATABLE(block) {
34
g_free(block->bmap);
35
block->bmap = NULL;
36
@@ -XXX,XX +XXX,XX @@ static void colo_flush_ram_cache(void)
37
void *src_host;
38
unsigned long offset = 0;
39
40
+ memory_global_dirty_log_sync();
41
+ rcu_read_lock();
42
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
43
+ migration_bitmap_sync_range(ram_state, block, 0, block->used_length);
44
+ }
45
+ rcu_read_unlock();
46
+
47
trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
48
rcu_read_lock();
49
block = QLIST_FIRST_RCU(&ram_list.blocks);
50
--
51
2.17.1
52
53
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangckid@gmail.com>
2
1
3
After one round of checkpoint, the states between PVM and SVM
4
become consistent, so it is unnecessary to adjust the sequence
5
of net packets for old connections, besides, while failover
6
happens, filter-rewriter will into failover mode that needn't
7
handle the new TCP connection.
8
9
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
10
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
11
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
net/colo-compare.c | 12 ++++-----
15
net/colo.c | 8 ++++++
16
net/colo.h | 2 ++
17
net/filter-rewriter.c | 57 +++++++++++++++++++++++++++++++++++++++++++
18
4 files changed, 73 insertions(+), 6 deletions(-)
19
20
diff --git a/net/colo-compare.c b/net/colo-compare.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/net/colo-compare.c
23
+++ b/net/colo-compare.c
24
@@ -XXX,XX +XXX,XX @@ enum {
25
SECONDARY_IN,
26
};
27
28
+static void colo_compare_inconsistency_notify(void)
29
+{
30
+ notifier_list_notify(&colo_compare_notifiers,
31
+ migrate_get_current());
32
+}
33
+
34
static int compare_chr_send(CompareState *s,
35
const uint8_t *buf,
36
uint32_t size,
37
@@ -XXX,XX +XXX,XX @@ static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
38
return false;
39
}
40
41
-static void colo_compare_inconsistency_notify(void)
42
-{
43
- notifier_list_notify(&colo_compare_notifiers,
44
- migrate_get_current());
45
-}
46
-
47
static void colo_compare_tcp(CompareState *s, Connection *conn)
48
{
49
Packet *ppkt = NULL, *spkt = NULL;
50
diff --git a/net/colo.c b/net/colo.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/net/colo.c
53
+++ b/net/colo.c
54
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
55
56
return conn;
57
}
58
+
59
+bool connection_has_tracked(GHashTable *connection_track_table,
60
+ ConnectionKey *key)
61
+{
62
+ Connection *conn = g_hash_table_lookup(connection_track_table, key);
63
+
64
+ return conn ? true : false;
65
+}
66
diff --git a/net/colo.h b/net/colo.h
67
index XXXXXXX..XXXXXXX 100644
68
--- a/net/colo.h
69
+++ b/net/colo.h
70
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque);
71
Connection *connection_get(GHashTable *connection_track_table,
72
ConnectionKey *key,
73
GQueue *conn_list);
74
+bool connection_has_tracked(GHashTable *connection_track_table,
75
+ ConnectionKey *key);
76
void connection_hashtable_reset(GHashTable *connection_track_table);
77
Packet *packet_new(const void *data, int size, int vnet_hdr_len);
78
void packet_destroy(void *opaque, void *user_data);
79
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/net/filter-rewriter.c
82
+++ b/net/filter-rewriter.c
83
@@ -XXX,XX +XXX,XX @@
84
#include "qemu/main-loop.h"
85
#include "qemu/iov.h"
86
#include "net/checksum.h"
87
+#include "net/colo.h"
88
+#include "migration/colo.h"
89
90
#define FILTER_COLO_REWRITER(obj) \
91
OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
92
93
#define TYPE_FILTER_REWRITER "filter-rewriter"
94
+#define FAILOVER_MODE_ON true
95
+#define FAILOVER_MODE_OFF false
96
97
typedef struct RewriterState {
98
NetFilterState parent_obj;
99
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
100
/* hashtable to save connection */
101
GHashTable *connection_track_table;
102
bool vnet_hdr;
103
+ bool failover_mode;
104
} RewriterState;
105
106
+static void filter_rewriter_failover_mode(RewriterState *s)
107
+{
108
+ s->failover_mode = FAILOVER_MODE_ON;
109
+}
110
+
111
static void filter_rewriter_flush(NetFilterState *nf)
112
{
113
RewriterState *s = FILTER_COLO_REWRITER(nf);
114
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
115
*/
116
reverse_connection_key(&key);
117
}
118
+
119
+ /* After failover we needn't change new TCP packet */
120
+ if (s->failover_mode &&
121
+ !connection_has_tracked(s->connection_track_table, &key)) {
122
+ goto out;
123
+ }
124
+
125
conn = connection_get(s->connection_track_table,
126
&key,
127
NULL);
128
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
129
}
130
}
131
132
+out:
133
packet_destroy(pkt, NULL);
134
pkt = NULL;
135
return 0;
136
}
137
138
+static void reset_seq_offset(gpointer key, gpointer value, gpointer user_data)
139
+{
140
+ Connection *conn = (Connection *)value;
141
+
142
+ conn->offset = 0;
143
+}
144
+
145
+static gboolean offset_is_nonzero(gpointer key,
146
+ gpointer value,
147
+ gpointer user_data)
148
+{
149
+ Connection *conn = (Connection *)value;
150
+
151
+ return conn->offset ? true : false;
152
+}
153
+
154
+static void colo_rewriter_handle_event(NetFilterState *nf, int event,
155
+ Error **errp)
156
+{
157
+ RewriterState *rs = FILTER_COLO_REWRITER(nf);
158
+
159
+ switch (event) {
160
+ case COLO_EVENT_CHECKPOINT:
161
+ g_hash_table_foreach(rs->connection_track_table,
162
+ reset_seq_offset, NULL);
163
+ break;
164
+ case COLO_EVENT_FAILOVER:
165
+ if (!g_hash_table_find(rs->connection_track_table,
166
+ offset_is_nonzero, NULL)) {
167
+ filter_rewriter_failover_mode(rs);
168
+ }
169
+ break;
170
+ default:
171
+ break;
172
+ }
173
+}
174
+
175
static void colo_rewriter_cleanup(NetFilterState *nf)
176
{
177
RewriterState *s = FILTER_COLO_REWRITER(nf);
178
@@ -XXX,XX +XXX,XX @@ static void filter_rewriter_init(Object *obj)
179
RewriterState *s = FILTER_COLO_REWRITER(obj);
180
181
s->vnet_hdr = false;
182
+ s->failover_mode = FAILOVER_MODE_OFF;
183
object_property_add_bool(obj, "vnet_hdr_support",
184
filter_rewriter_get_vnet_hdr,
185
filter_rewriter_set_vnet_hdr, NULL);
186
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_class_init(ObjectClass *oc, void *data)
187
nfc->setup = colo_rewriter_setup;
188
nfc->cleanup = colo_rewriter_cleanup;
189
nfc->receive_iov = colo_rewriter_receive_iov;
190
+ nfc->handle_event = colo_rewriter_handle_event;
191
}
192
193
static const TypeInfo colo_rewriter_info = {
194
--
195
2.17.1
196
197
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
Notify all net filters about the checkpoint and failover event.
4
5
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
6
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
migration/colo.c | 15 +++++++++++++++
10
1 file changed, 15 insertions(+)
11
12
diff --git a/migration/colo.c b/migration/colo.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/migration/colo.c
15
+++ b/migration/colo.c
16
@@ -XXX,XX +XXX,XX @@
17
#include "qapi/qapi-events-migration.h"
18
#include "qapi/qmp/qerror.h"
19
#include "sysemu/cpus.h"
20
+#include "net/filter.h"
21
22
static bool vmstate_loading;
23
static Notifier packets_compare_notifier;
24
@@ -XXX,XX +XXX,XX @@ static void secondary_vm_do_failover(void)
25
error_report_err(local_err);
26
}
27
28
+ /* Notify all filters of all NIC to do checkpoint */
29
+ colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
30
+ if (local_err) {
31
+ error_report_err(local_err);
32
+ }
33
+
34
if (!autostart) {
35
error_report("\"-S\" qemu option will be ignored in secondary side");
36
/* recover runstate to normal migration finish state */
37
@@ -XXX,XX +XXX,XX @@ void *colo_process_incoming_thread(void *opaque)
38
goto out;
39
}
40
41
+ /* Notify all filters of all NIC to do checkpoint */
42
+ colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
43
+
44
+ if (local_err) {
45
+ qemu_mutex_unlock_iothread();
46
+ goto out;
47
+ }
48
+
49
vmstate_loading = false;
50
vm_start();
51
trace_colo_vm_state_change("stop", "run");
52
--
53
2.17.1
54
55
diff view generated by jsdifflib
1
There should not be a reason for passing a packet size greater than
1
From: Andrew Melnychenko <andrew@daynix.com>
2
INT_MAX. It's usually a hint of bug somewhere, so ignore packet size
3
greater than INT_MAX in qemu_deliver_packet_iov()
4
2
5
CC: qemu-stable@nongnu.org
3
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
6
Reported-by: Daniel Shapira <daniel@twistlock.com>
4
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
6
---
10
net/net.c | 7 ++++++-
7
docs/devel/ebpf_rss.rst | 125 ++++++++++++++++++++++++++++++++++++++++++++++++
11
1 file changed, 6 insertions(+), 1 deletion(-)
8
docs/devel/index.rst | 1 +
9
2 files changed, 126 insertions(+)
10
create mode 100644 docs/devel/ebpf_rss.rst
12
11
13
diff --git a/net/net.c b/net/net.c
12
diff --git a/docs/devel/ebpf_rss.rst b/docs/devel/ebpf_rss.rst
13
new file mode 100644
14
index XXXXXXX..XXXXXXX
15
--- /dev/null
16
+++ b/docs/devel/ebpf_rss.rst
17
@@ -XXX,XX +XXX,XX @@
18
+===========================
19
+eBPF RSS virtio-net support
20
+===========================
21
+
22
+RSS(Receive Side Scaling) is used to distribute network packets to guest virtqueues
23
+by calculating packet hash. Usually every queue is processed then by a specific guest CPU core.
24
+
25
+For now there are 2 RSS implementations in qemu:
26
+- 'in-qemu' RSS (functions if qemu receives network packets, i.e. vhost=off)
27
+- eBPF RSS (can function with also with vhost=on)
28
+
29
+eBPF support (CONFIG_EBPF) is enabled by 'configure' script.
30
+To enable eBPF RSS support use './configure --enable-bpf'.
31
+
32
+If steering BPF is not set for kernel's TUN module, the TUN uses automatic selection
33
+of rx virtqueue based on lookup table built according to calculated symmetric hash
34
+of transmitted packets.
35
+If steering BPF is set for TUN the BPF code calculates the hash of packet header and
36
+returns the virtqueue number to place the packet to.
37
+
38
+Simplified decision formula:
39
+
40
+.. code:: C
41
+
42
+ queue_index = indirection_table[hash(<packet data>)%<indirection_table size>]
43
+
44
+
45
+Not for all packets, the hash can/should be calculated.
46
+
47
+Note: currently, eBPF RSS does not support hash reporting.
48
+
49
+eBPF RSS turned on by different combinations of vhost-net, vitrio-net and tap configurations:
50
+
51
+- eBPF is used:
52
+
53
+ tap,vhost=off & virtio-net-pci,rss=on,hash=off
54
+
55
+- eBPF is used:
56
+
57
+ tap,vhost=on & virtio-net-pci,rss=on,hash=off
58
+
59
+- 'in-qemu' RSS is used:
60
+
61
+ tap,vhost=off & virtio-net-pci,rss=on,hash=on
62
+
63
+- eBPF is used, hash population feature is not reported to the guest:
64
+
65
+ tap,vhost=on & virtio-net-pci,rss=on,hash=on
66
+
67
+If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported.
68
+Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed to load or set to TUN.
69
+
70
+RSS eBPF program
71
+----------------
72
+
73
+RSS program located in ebpf/rss.bpf.skeleton.h generated by bpftool.
74
+So the program is part of the qemu binary.
75
+Initially, the eBPF program was compiled by clang and source code located at tools/ebpf/rss.bpf.c.
76
+Prerequisites to recompile the eBPF program (regenerate ebpf/rss.bpf.skeleton.h):
77
+
78
+ llvm, clang, kernel source tree, bpftool
79
+ Adjust Makefile.ebpf to reflect the location of the kernel source tree
80
+
81
+ $ cd tools/ebpf
82
+ $ make -f Makefile.ebpf
83
+
84
+Current eBPF RSS implementation uses 'bounded loops' with 'backward jump instructions' which present in the last kernels.
85
+Overall eBPF RSS works on kernels 5.8+.
86
+
87
+eBPF RSS implementation
88
+-----------------------
89
+
90
+eBPF RSS loading functionality located in ebpf/ebpf_rss.c and ebpf/ebpf_rss.h.
91
+
92
+The `struct EBPFRSSContext` structure that holds 4 file descriptors:
93
+
94
+- ctx - pointer of the libbpf context.
95
+- program_fd - file descriptor of the eBPF RSS program.
96
+- map_configuration - file descriptor of the 'configuration' map. This map contains one element of 'struct EBPFRSSConfig'. This configuration determines eBPF program behavior.
97
+- map_toeplitz_key - file descriptor of the 'Toeplitz key' map. One element of the 40byte key prepared for the hashing algorithm.
98
+- map_indirections_table - 128 elements of queue indexes.
99
+
100
+`struct EBPFRSSConfig` fields:
101
+
102
+- redirect - "boolean" value, should the hash be calculated, on false - `default_queue` would be used as the final decision.
103
+- populate_hash - for now, not used. eBPF RSS doesn't support hash reporting.
104
+- hash_types - binary mask of different hash types. See `VIRTIO_NET_RSS_HASH_TYPE_*` defines. If for packet hash should not be calculated - `default_queue` would be used.
105
+- indirections_len - length of the indirections table, maximum 128.
106
+- default_queue - the queue index that used for packet that shouldn't be hashed. For some packets, the hash can't be calculated(g.e ARP).
107
+
108
+Functions:
109
+
110
+- `ebpf_rss_init()` - sets ctx to NULL, which indicates that EBPFRSSContext is not loaded.
111
+- `ebpf_rss_load()` - creates 3 maps and loads eBPF program from the rss.bpf.skeleton.h. Returns 'true' on success. After that, program_fd can be used to set steering for TAP.
112
+- `ebpf_rss_set_all()` - sets values for eBPF maps. `indirections_table` length is in EBPFRSSConfig. `toeplitz_key` is VIRTIO_NET_RSS_MAX_KEY_SIZE aka 40 bytes array.
113
+- `ebpf_rss_unload()` - close all file descriptors and set ctx to NULL.
114
+
115
+Simplified eBPF RSS workflow:
116
+
117
+.. code:: C
118
+
119
+ struct EBPFRSSConfig config;
120
+ config.redirect = 1;
121
+ config.hash_types = VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4;
122
+ config.indirections_len = VIRTIO_NET_RSS_MAX_TABLE_LEN;
123
+ config.default_queue = 0;
124
+
125
+ uint16_t table[VIRTIO_NET_RSS_MAX_TABLE_LEN] = {...};
126
+ uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {...};
127
+
128
+ struct EBPFRSSContext ctx;
129
+ ebpf_rss_init(&ctx);
130
+ ebpf_rss_load(&ctx);
131
+ ebpf_rss_set_all(&ctx, &config, table, key);
132
+ if (net_client->info->set_steering_ebpf != NULL) {
133
+ net_client->info->set_steering_ebpf(net_client, ctx->program_fd);
134
+ }
135
+ ...
136
+ ebpf_unload(&ctx);
137
+
138
+
139
+NetClientState SetSteeringEBPF()
140
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
141
+
142
+For now, `set_steering_ebpf()` method supported by Linux TAP NetClientState. The method requires an eBPF program file descriptor as an argument.
143
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
14
index XXXXXXX..XXXXXXX 100644
144
index XXXXXXX..XXXXXXX 100644
15
--- a/net/net.c
145
--- a/docs/devel/index.rst
16
+++ b/net/net.c
146
+++ b/docs/devel/index.rst
17
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
147
@@ -XXX,XX +XXX,XX @@ Contents:
18
void *opaque)
148
qom
19
{
149
block-coroutine-wrapper
20
NetClientState *nc = opaque;
150
multi-process
21
+ size_t size = iov_size(iov, iovcnt);
151
+ ebpf_rss
22
int ret;
23
24
+ if (size > INT_MAX) {
25
+ return size;
26
+ }
27
+
28
if (nc->link_down) {
29
- return iov_size(iov, iovcnt);
30
+ return size;
31
}
32
33
if (nc->receive_disabled) {
34
--
152
--
35
2.17.1
153
2.7.4
36
154
37
155
diff view generated by jsdifflib
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
COLO thread may sleep at qemu_sem_wait(&s->colo_checkpoint_sem),
3
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
4
while failover works begin, It's better to wakeup it to quick
4
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
5
the process.
6
7
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
8
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
6
---
11
migration/colo.c | 8 ++++++++
7
MAINTAINERS | 8 ++++++++
12
1 file changed, 8 insertions(+)
8
1 file changed, 8 insertions(+)
13
9
14
diff --git a/migration/colo.c b/migration/colo.c
10
diff --git a/MAINTAINERS b/MAINTAINERS
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/migration/colo.c
12
--- a/MAINTAINERS
17
+++ b/migration/colo.c
13
+++ b/MAINTAINERS
18
@@ -XXX,XX +XXX,XX @@ static void primary_vm_do_failover(void)
14
@@ -XXX,XX +XXX,XX @@ F: include/hw/remote/proxy-memory-listener.h
19
15
F: hw/remote/iohub.c
20
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
16
F: include/hw/remote/iohub.h
21
MIGRATION_STATUS_COMPLETED);
17
22
+ /*
18
+EBPF:
23
+ * kick COLO thread which might wait at
19
+M: Jason Wang <jasowang@redhat.com>
24
+ * qemu_sem_wait(&s->colo_checkpoint_sem).
20
+R: Andrew Melnychenko <andrew@daynix.com>
25
+ */
21
+R: Yuri Benditovich <yuri.benditovich@daynix.com>
26
+ colo_checkpoint_notify(migrate_get_current());
22
+S: Maintained
27
23
+F: ebpf/*
28
/*
24
+F: tools/ebpf/*
29
* Wake up COLO thread which may blocked in recv() or send(),
25
+
30
@@ -XXX,XX +XXX,XX @@ static void colo_process_checkpoint(MigrationState *s)
26
Build and test automation
31
27
-------------------------
32
qemu_sem_wait(&s->colo_checkpoint_sem);
28
Build and test automation, general continuous integration
33
34
+ if (s->state != MIGRATION_STATUS_COLO) {
35
+ goto out;
36
+ }
37
ret = colo_do_checkpoint_transaction(s, bioc, fb);
38
if (ret < 0) {
39
goto out;
40
--
29
--
41
2.17.1
30
2.7.4
42
31
43
32
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <chen.zhang@intel.com>
2
1
3
This diagram make user better understand COLO.
4
Suggested by Markus Armbruster.
5
6
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
docs/COLO-FT.txt | 34 ++++++++++++++++++++++++++++++++++
11
1 file changed, 34 insertions(+)
12
13
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
14
index XXXXXXX..XXXXXXX 100644
15
--- a/docs/COLO-FT.txt
16
+++ b/docs/COLO-FT.txt
17
@@ -XXX,XX +XXX,XX @@ Note:
18
HeartBeat has not been implemented yet, so you need to trigger failover process
19
by using 'x-colo-lost-heartbeat' command.
20
21
+== COLO operation status ==
22
+
23
++-----------------+
24
+| |
25
+| Start COLO |
26
+| |
27
++--------+--------+
28
+ |
29
+ | Main qmp command:
30
+ | migrate-set-capabilities with x-colo
31
+ | migrate
32
+ |
33
+ v
34
++--------+--------+
35
+| |
36
+| COLO running |
37
+| |
38
++--------+--------+
39
+ |
40
+ | Main qmp command:
41
+ | x-colo-lost-heartbeat
42
+ | or
43
+ | some error happened
44
+ v
45
++--------+--------+
46
+| | send qmp event:
47
+| COLO failover | COLO_EXIT
48
+| |
49
++-----------------+
50
+
51
+COLO use the qmp command to switch and report operation status.
52
+The diagram just shows the main qmp command, you can get the detail
53
+in test procedure.
54
+
55
== Test procedure ==
56
1. Startup qemu
57
Primary:
58
--
59
2.17.1
60
61
diff view generated by jsdifflib
Deleted patch
1
From: liujunjie <liujunjie23@huawei.com>
2
1
3
Before, we did not clear callback like handle_output when delete
4
the virtqueue which may result be segmentfault.
5
The scene is as follows:
6
1. Start a vm with multiqueue vhost-net,
7
2. then we write VIRTIO_PCI_GUEST_FEATURES in PCI configuration to
8
triger multiqueue disable in this vm which will delete the virtqueue.
9
In this step, the tx_bh is deleted but the callback virtio_net_handle_tx_bh
10
still exist.
11
3. Finally, we write VIRTIO_PCI_QUEUE_NOTIFY in PCI configuration to
12
notify the deleted virtqueue. In this way, virtio_net_handle_tx_bh
13
will be called and qemu will be crashed.
14
15
Although the way described above is uncommon, we had better reinforce it.
16
17
CC: qemu-stable@nongnu.org
18
Signed-off-by: liujunjie <liujunjie23@huawei.com>
19
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
---
21
hw/virtio/virtio.c | 2 ++
22
1 file changed, 2 insertions(+)
23
24
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/virtio/virtio.c
27
+++ b/hw/virtio/virtio.c
28
@@ -XXX,XX +XXX,XX @@ void virtio_del_queue(VirtIODevice *vdev, int n)
29
30
vdev->vq[n].vring.num = 0;
31
vdev->vq[n].vring.num_default = 0;
32
+ vdev->vq[n].handle_output = NULL;
33
+ vdev->vq[n].handle_aio_output = NULL;
34
}
35
36
static void virtio_set_isr(VirtIODevice *vdev, int value)
37
--
38
2.17.1
39
40
diff view generated by jsdifflib
Deleted patch
1
In ne2000_receive(), we try to assign size_ to size which converts
2
from size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access of
5
for both buf and buf1.
6
1
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/ne2000.c | 4 ++--
15
1 file changed, 2 insertions(+), 2 deletions(-)
16
17
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/ne2000.c
20
+++ b/hw/net/ne2000.c
21
@@ -XXX,XX +XXX,XX @@ static int ne2000_buffer_full(NE2000State *s)
22
ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
23
{
24
NE2000State *s = qemu_get_nic_opaque(nc);
25
- int size = size_;
26
+ size_t size = size_;
27
uint8_t *p;
28
unsigned int total_len, next, avail, len, index, mcast_idx;
29
uint8_t buf1[60];
30
@@ -XXX,XX +XXX,XX @@ ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
31
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
32
33
#if defined(DEBUG_NE2000)
34
- printf("NE2000: received len=%d\n", size);
35
+ printf("NE2000: received len=%zu\n", size);
36
#endif
37
38
if (s->cmd & E8390_STOP || ne2000_buffer_full(s))
39
--
40
2.17.1
41
42
diff view generated by jsdifflib
Deleted patch
1
In rtl8139_do_receive(), we try to assign size_ to size which converts
2
from size_t to integer. This will cause troubles when size_ is greater
3
INT_MAX, this will lead a negative value in size and it can then pass
4
the check of size < MIN_BUF_SIZE which may lead out of bound access of
5
for both buf and buf1.
6
1
7
Fixing by converting the type of size to size_t.
8
9
CC: qemu-stable@nongnu.org
10
Reported-by: Daniel Shapira <daniel@twistlock.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/rtl8139.c | 8 ++++----
15
1 file changed, 4 insertions(+), 4 deletions(-)
16
17
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/rtl8139.c
20
+++ b/hw/net/rtl8139.c
21
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
22
RTL8139State *s = qemu_get_nic_opaque(nc);
23
PCIDevice *d = PCI_DEVICE(s);
24
/* size is the length of the buffer passed to the driver */
25
- int size = size_;
26
+ size_t size = size_;
27
const uint8_t *dot1q_buf = NULL;
28
29
uint32_t packet_header = 0;
30
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
31
static const uint8_t broadcast_macaddr[6] =
32
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
33
34
- DPRINTF(">>> received len=%d\n", size);
35
+ DPRINTF(">>> received len=%zu\n", size);
36
37
/* test if board clock is stopped */
38
if (!s->clock_enabled)
39
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
40
41
if (size+4 > rx_space)
42
{
43
- DPRINTF("C+ Rx mode : descriptor %d size %d received %d + 4\n",
44
+ DPRINTF("C+ Rx mode : descriptor %d size %d received %zu + 4\n",
45
descriptor, rx_space, size);
46
47
s->IntrStatus |= RxOverflow;
48
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
49
if (avail != 0 && RX_ALIGN(size + 8) >= avail)
50
{
51
DPRINTF("rx overflow: rx buffer length %d head 0x%04x "
52
- "read 0x%04x === available 0x%04x need 0x%04x\n",
53
+ "read 0x%04x === available 0x%04x need 0x%04zx\n",
54
s->RxBufferSize, s->RxBufAddr, s->RxBufPtr, avail, size + 8);
55
56
s->IntrStatus |= RxOverflow;
57
--
58
2.17.1
59
60
diff view generated by jsdifflib
Deleted patch
1
From: Martin Wilck <mwilck@suse.com>
2
1
3
The e1000 emulation silently discards RX packets if there's
4
insufficient space in the ring buffer. This leads to errors
5
on higher-level protocols in the guest, with no indication
6
about the error cause.
7
8
This patch increments the "Missed Packets Count" (MPC) and
9
"Receive No Buffers Count" (RNBC) HW counters in this case.
10
As the emulation has no FIFO for buffering packets that can't
11
immediately be pushed to the guest, these two registers are
12
practically equivalent (see 10.2.7.4, 10.2.7.33 in
13
https://www.intel.com/content/www/us/en/embedded/products/networking/82574l-gbe-controller-datasheet.html).
14
15
On a Linux guest, the register content will be reflected in
16
the "rx_missed_errors" and "rx_no_buffer_count" stats from
17
"ethtool -S", and in the "missed" stat from "ip -s -s link show",
18
giving at least some hint about the error cause inside the guest.
19
20
If the cause is known, problems like this can often be avoided
21
easily, by increasing the number of RX descriptors in the guest
22
e1000 driver (e.g under Linux, "e1000.RxDescriptors=1024").
23
24
The patch also adds a qemu trace message for this condition.
25
26
Signed-off-by: Martin Wilck <mwilck@suse.com>
27
---
28
hw/net/e1000.c | 16 +++++++++++++---
29
hw/net/trace-events | 3 +++
30
2 files changed, 16 insertions(+), 3 deletions(-)
31
32
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/net/e1000.c
35
+++ b/hw/net/e1000.c
36
@@ -XXX,XX +XXX,XX @@
37
#include "qemu/range.h"
38
39
#include "e1000x_common.h"
40
+#include "trace.h"
41
42
static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
43
44
@@ -XXX,XX +XXX,XX @@ static uint64_t rx_desc_base(E1000State *s)
45
return (bah << 32) + bal;
46
}
47
48
+static void
49
+e1000_receiver_overrun(E1000State *s, size_t size)
50
+{
51
+ trace_e1000_receiver_overrun(size, s->mac_reg[RDH], s->mac_reg[RDT]);
52
+ e1000x_inc_reg_if_not_full(s->mac_reg, RNBC);
53
+ e1000x_inc_reg_if_not_full(s->mac_reg, MPC);
54
+ set_ics(s, 0, E1000_ICS_RXO);
55
+}
56
+
57
static ssize_t
58
e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
59
{
60
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
61
desc_offset = 0;
62
total_size = size + e1000x_fcs_len(s->mac_reg);
63
if (!e1000_has_rxbufs(s, total_size)) {
64
- set_ics(s, 0, E1000_ICS_RXO);
65
- return -1;
66
+ e1000_receiver_overrun(s, total_size);
67
+ return -1;
68
}
69
do {
70
desc_size = total_size - desc_offset;
71
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
72
rdh_start >= s->mac_reg[RDLEN] / sizeof(desc)) {
73
DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
74
rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
75
- set_ics(s, 0, E1000_ICS_RXO);
76
+ e1000_receiver_overrun(s, total_size);
77
return -1;
78
}
79
} while (desc_offset < total_size);
80
diff --git a/hw/net/trace-events b/hw/net/trace-events
81
index XXXXXXX..XXXXXXX 100644
82
--- a/hw/net/trace-events
83
+++ b/hw/net/trace-events
84
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
85
net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X"
86
net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes"
87
88
+# hw/net/e1000.c
89
+e1000_receiver_overrun(size_t s, uint32_t rdh, uint32_t rdt) "Receiver overrun: dropped packet of %lu bytes, RDH=%u, RDT=%u"
90
+
91
# hw/net/e1000x_common.c
92
e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d"
93
e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X"
94
--
95
2.17.1
96
97
diff view generated by jsdifflib