1
The following changes since commit 6632f6ff96f0537fc34cdc00c760656fc62e23c5:
1
The following changes since commit 7dd9d7e0bd29abf590d1ac235c0a00606ef81153:
2
2
3
Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging (2017-07-17 11:46:36 +0100)
3
Merge tag 'pull-ppc-20220831' of https://gitlab.com/danielhb/qemu into staging (2022-09-01 13:53:20 -0400)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 189ae6bb5ce1f5a322f8691d00fe942ba43dd601:
9
for you to fetch changes up to 36a894aeb64a2e02871016da1c37d4a4ca109182:
10
10
11
virtio-net: fix offload ctrl endian (2017-07-17 20:13:56 +0800)
11
net: tulip: Restrict DMA engine to memories (2022-09-02 10:22:39 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
- fix virtio-net ctrl offload endian
15
----------------------------------------------------------------
16
- vnet header support for variou COLO netfilters and compare thread
16
Eugenio Pérez (19):
17
vdpa: Skip the maps not in the iova tree
18
vdpa: do not save failed dma maps in SVQ iova tree
19
util: accept iova_tree_remove_parameter by value
20
vdpa: Remove SVQ vring from iova_tree at shutdown
21
vdpa: Make SVQ vring unmapping return void
22
vhost: Always store new kick fd on vhost_svq_set_svq_kick_fd
23
vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring
24
vhost: stop transfer elem ownership in vhost_handle_guest_kick
25
vhost: use SVQ element ndescs instead of opaque data for desc validation
26
vhost: Delete useless read memory barrier
27
vhost: Do not depend on !NULL VirtQueueElement on vhost_svq_flush
28
vhost_net: Add NetClientInfo start callback
29
vhost_net: Add NetClientInfo stop callback
30
vdpa: add net_vhost_vdpa_cvq_info NetClientInfo
31
vdpa: Move command buffers map to start of net device
32
vdpa: extract vhost_vdpa_net_cvq_add from vhost_vdpa_net_handle_ctrl_avail
33
vhost_net: add NetClientState->load() callback
34
vdpa: Add virtio-net mac address via CVQ at start
35
vdpa: Delete CVQ migration blocker
17
36
18
----------------------------------------------------------------
37
Zhang Chen (1):
19
Jason Wang (1):
38
net/colo.c: Fix the pointer issue reported by Coverity.
20
virtio-net: fix offload ctrl endian
21
39
22
Michal Privoznik (1):
40
Zheyu Ma (1):
23
virtion-net: Prefer is_power_of_2()
41
net: tulip: Restrict DMA engine to memories
24
42
25
Zhang Chen (12):
43
hw/i386/intel_iommu.c | 6 +-
26
net: Add vnet_hdr_len arguments in NetClientState
44
hw/net/tulip.c | 4 +-
27
net/net.c: Add vnet_hdr support in SocketReadState
45
hw/net/vhost_net.c | 17 +++
28
net/filter-mirror.c: Introduce parameter for filter_send()
46
hw/virtio/vhost-iova-tree.c | 2 +-
29
net/filter-mirror.c: Make filter mirror support vnet support.
47
hw/virtio/vhost-iova-tree.h | 2 +-
30
net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
48
hw/virtio/vhost-shadow-virtqueue.c | 31 +++--
31
net/colo.c: Make vnet_hdr_len as packet property
49
hw/virtio/vhost-vdpa.c | 90 +++++++--------
32
net/colo-compare.c: Introduce parameter for compare_chr_send()
50
include/hw/virtio/vhost-vdpa.h | 1 -
33
net/colo-compare.c: Make colo-compare support vnet_hdr_len
51
include/net/net.h | 6 +
34
net/colo.c: Add vnet packet parse feature in colo-proxy
52
include/qemu/iova-tree.h | 2 +-
35
net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
53
net/colo.c | 25 ++--
36
net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
54
net/colo.h | 1 +
37
docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
55
net/trace-events | 2 +-
38
56
net/vhost-vdpa.c | 228 +++++++++++++++++++++++--------------
39
docs/colo-proxy.txt | 26 ++++++++++++++++
57
util/iova-tree.c | 4 +-
40
hw/net/virtio-net.c | 4 ++-
58
15 files changed, 248 insertions(+), 173 deletions(-)
41
include/net/net.h | 10 ++++--
42
net/colo-compare.c | 84 ++++++++++++++++++++++++++++++++++++++++++---------
43
net/colo.c | 9 +++---
44
net/colo.h | 4 ++-
45
net/filter-mirror.c | 75 +++++++++++++++++++++++++++++++++++++++++----
46
net/filter-rewriter.c | 37 ++++++++++++++++++++++-
47
net/net.c | 37 ++++++++++++++++++++---
48
net/socket.c | 8 ++---
49
qemu-options.hx | 19 ++++++------
50
11 files changed, 265 insertions(+), 48 deletions(-)
51
59
52
60
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Next patch will skip the registering of dma maps that the vdpa device
4
rejects in the iova tree. We need to consider that here or we cause a
5
SIGSEGV accessing result.
6
7
Reported-by: Lei Yang <leiyang@redhat.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Acked-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/virtio/vhost-vdpa.c | 4 ++++
13
1 file changed, 4 insertions(+)
14
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/virtio/vhost-vdpa.c
18
+++ b/hw/virtio/vhost-vdpa.c
19
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
20
};
21
22
result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
23
+ if (!result) {
24
+ /* The memory listener map wasn't mapped */
25
+ return;
26
+ }
27
iova = result->iova;
28
vhost_iova_tree_remove(v->iova_tree, result);
29
}
30
--
31
2.7.4
32
33
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
If a map fails for whatever reason, it must not be saved in the tree.
4
Otherwise, qemu will try to unmap it in cleanup, leaving to more errors.
5
6
Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ")
7
Reported-by: Lei Yang <leiyang@redhat.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Acked-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/virtio/vhost-vdpa.c | 20 +++++++++++++-------
13
1 file changed, 13 insertions(+), 7 deletions(-)
14
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/virtio/vhost-vdpa.c
18
+++ b/hw/virtio/vhost-vdpa.c
19
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
20
static void vhost_vdpa_listener_region_add(MemoryListener *listener,
21
MemoryRegionSection *section)
22
{
23
+ DMAMap mem_region = {};
24
struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
25
hwaddr iova;
26
Int128 llend, llsize;
27
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
28
29
llsize = int128_sub(llend, int128_make64(iova));
30
if (v->shadow_vqs_enabled) {
31
- DMAMap mem_region = {
32
- .translated_addr = (hwaddr)(uintptr_t)vaddr,
33
- .size = int128_get64(llsize) - 1,
34
- .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
35
- };
36
+ int r;
37
38
- int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
39
+ mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr,
40
+ mem_region.size = int128_get64(llsize) - 1,
41
+ mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
42
+
43
+ r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
44
if (unlikely(r != IOVA_OK)) {
45
error_report("Can't allocate a mapping (%d)", r);
46
goto fail;
47
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
48
vaddr, section->readonly);
49
if (ret) {
50
error_report("vhost vdpa map fail!");
51
- goto fail;
52
+ goto fail_map;
53
}
54
55
return;
56
57
+fail_map:
58
+ if (v->shadow_vqs_enabled) {
59
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
60
+ }
61
+
62
fail:
63
/*
64
* On the initfn path, store the first error in the container so we
65
--
66
2.7.4
67
68
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
It's convenient to call iova_tree_remove from a map returned from
4
iova_tree_find or iova_tree_find_iova. With the current code this is not
5
possible, since we will free it, and then we will try to search for it
6
again.
7
8
Fix it making accepting the map by value, forcing a copy of the
9
argument. Not applying a fixes tag, since there is no use like that at
10
the moment.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
hw/i386/intel_iommu.c | 6 +++---
16
hw/virtio/vhost-iova-tree.c | 2 +-
17
hw/virtio/vhost-iova-tree.h | 2 +-
18
hw/virtio/vhost-vdpa.c | 6 +++---
19
include/qemu/iova-tree.h | 2 +-
20
net/vhost-vdpa.c | 4 ++--
21
util/iova-tree.c | 4 ++--
22
7 files changed, 13 insertions(+), 13 deletions(-)
23
24
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/i386/intel_iommu.c
27
+++ b/hw/i386/intel_iommu.c
28
@@ -XXX,XX +XXX,XX @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info)
29
return ret;
30
}
31
/* Drop any existing mapping */
32
- iova_tree_remove(as->iova_tree, &target);
33
+ iova_tree_remove(as->iova_tree, target);
34
/* Recover the correct type */
35
event->type = IOMMU_NOTIFIER_MAP;
36
entry->perm = cache_perm;
37
@@ -XXX,XX +XXX,XX @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info)
38
trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
39
return 0;
40
}
41
- iova_tree_remove(as->iova_tree, &target);
42
+ iova_tree_remove(as->iova_tree, target);
43
}
44
45
trace_vtd_page_walk_one(info->domain_id, entry->iova,
46
@@ -XXX,XX +XXX,XX @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
47
48
map.iova = n->start;
49
map.size = size;
50
- iova_tree_remove(as->iova_tree, &map);
51
+ iova_tree_remove(as->iova_tree, map);
52
}
53
54
static void vtd_address_space_unmap_all(IntelIOMMUState *s)
55
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/hw/virtio/vhost-iova-tree.c
58
+++ b/hw/virtio/vhost-iova-tree.c
59
@@ -XXX,XX +XXX,XX @@ int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
60
* @iova_tree: The vhost iova tree
61
* @map: The map to remove
62
*/
63
-void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
64
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map)
65
{
66
iova_tree_remove(iova_tree->iova_taddr_map, map);
67
}
68
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
69
index XXXXXXX..XXXXXXX 100644
70
--- a/hw/virtio/vhost-iova-tree.h
71
+++ b/hw/virtio/vhost-iova-tree.h
72
@@ -XXX,XX +XXX,XX @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
73
const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
74
const DMAMap *map);
75
int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
76
-void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
77
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map);
78
79
#endif
80
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
81
index XXXXXXX..XXXXXXX 100644
82
--- a/hw/virtio/vhost-vdpa.c
83
+++ b/hw/virtio/vhost-vdpa.c
84
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
85
86
fail_map:
87
if (v->shadow_vqs_enabled) {
88
- vhost_iova_tree_remove(v->iova_tree, &mem_region);
89
+ vhost_iova_tree_remove(v->iova_tree, mem_region);
90
}
91
92
fail:
93
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
94
return;
95
}
96
iova = result->iova;
97
- vhost_iova_tree_remove(v->iova_tree, result);
98
+ vhost_iova_tree_remove(v->iova_tree, *result);
99
}
100
vhost_vdpa_iotlb_batch_begin_once(v);
101
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
102
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
103
needle->perm == IOMMU_RO);
104
if (unlikely(r != 0)) {
105
error_setg_errno(errp, -r, "Cannot map region to device");
106
- vhost_iova_tree_remove(v->iova_tree, needle);
107
+ vhost_iova_tree_remove(v->iova_tree, *needle);
108
}
109
110
return r == 0;
111
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
112
index XXXXXXX..XXXXXXX 100644
113
--- a/include/qemu/iova-tree.h
114
+++ b/include/qemu/iova-tree.h
115
@@ -XXX,XX +XXX,XX @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map);
116
* all the mappings that are included in the provided range will be
117
* removed from the tree. Here map->translated_addr is meaningless.
118
*/
119
-void iova_tree_remove(IOVATree *tree, const DMAMap *map);
120
+void iova_tree_remove(IOVATree *tree, DMAMap map);
121
122
/**
123
* iova_tree_find:
124
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/net/vhost-vdpa.c
127
+++ b/net/vhost-vdpa.c
128
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
129
error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
130
}
131
132
- vhost_iova_tree_remove(tree, map);
133
+ vhost_iova_tree_remove(tree, *map);
134
}
135
136
static size_t vhost_vdpa_net_cvq_cmd_len(void)
137
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
138
return true;
139
140
dma_map_err:
141
- vhost_iova_tree_remove(v->iova_tree, &map);
142
+ vhost_iova_tree_remove(v->iova_tree, map);
143
return false;
144
}
145
146
diff --git a/util/iova-tree.c b/util/iova-tree.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/util/iova-tree.c
149
+++ b/util/iova-tree.c
150
@@ -XXX,XX +XXX,XX @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator)
151
g_tree_foreach(tree->tree, iova_tree_traverse, iterator);
152
}
153
154
-void iova_tree_remove(IOVATree *tree, const DMAMap *map)
155
+void iova_tree_remove(IOVATree *tree, DMAMap map)
156
{
157
const DMAMap *overlap;
158
159
- while ((overlap = iova_tree_find(tree, map))) {
160
+ while ((overlap = iova_tree_find(tree, &map))) {
161
g_tree_remove(tree->tree, overlap);
162
}
163
}
164
--
165
2.7.4
166
167
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Although the device will be reset before usage, the right thing to do is
4
to clean it.
5
6
Reported-by: Lei Yang <leiyang@redhat.com>
7
Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ")
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/virtio/vhost-vdpa.c | 6 ++++++
12
1 file changed, 6 insertions(+)
13
14
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-vdpa.c
17
+++ b/hw/virtio/vhost-vdpa.c
18
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
19
20
size = ROUND_UP(result->size, qemu_real_host_page_size());
21
r = vhost_vdpa_dma_unmap(v, result->iova, size);
22
+ if (unlikely(r < 0)) {
23
+ error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r);
24
+ return false;
25
+ }
26
+
27
+ vhost_iova_tree_remove(v->iova_tree, *result);
28
return r == 0;
29
}
30
31
--
32
2.7.4
33
34
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-redirector, default is disabled.
3
Nothing actually reads the return value, but an error in cleaning some
4
If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it.
4
entries could cause device stop to abort, making a restart impossible.
5
Because colo-compare or other modules needs the vnet_hdr_len to parse
5
Better ignore explicitely the return value.
6
packet, we add this new option send the len to others.
7
You can use it for example:
8
-object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support
9
6
10
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Reported-by: Lei Yang <leiyang@redhat.com>
8
Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ")
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
12
---
13
net/filter-mirror.c | 23 +++++++++++++++++++++++
13
hw/virtio/vhost-vdpa.c | 32 ++++++++++----------------------
14
qemu-options.hx | 6 +++---
14
1 file changed, 10 insertions(+), 22 deletions(-)
15
2 files changed, 26 insertions(+), 3 deletions(-)
16
15
17
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
16
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/net/filter-mirror.c
18
--- a/hw/virtio/vhost-vdpa.c
20
+++ b/net/filter-mirror.c
19
+++ b/hw/virtio/vhost-vdpa.c
21
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
20
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
22
s->outdev = g_strdup(value);
21
/**
22
* Unmap a SVQ area in the device
23
*/
24
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
25
+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
26
const DMAMap *needle)
27
{
28
const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
29
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
30
31
if (unlikely(!result)) {
32
error_report("Unable to find SVQ address to unmap");
33
- return false;
34
+ return;
35
}
36
37
size = ROUND_UP(result->size, qemu_real_host_page_size());
38
r = vhost_vdpa_dma_unmap(v, result->iova, size);
39
if (unlikely(r < 0)) {
40
error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r);
41
- return false;
42
+ return;
43
}
44
45
vhost_iova_tree_remove(v->iova_tree, *result);
46
- return r == 0;
23
}
47
}
24
48
25
+static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp)
49
-static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
26
+{
50
+static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
27
+ MirrorState *s = FILTER_REDIRECTOR(obj);
51
const VhostShadowVirtqueue *svq)
28
+
29
+ return s->vnet_hdr;
30
+}
31
+
32
+static void filter_redirector_set_vnet_hdr(Object *obj,
33
+ bool value,
34
+ Error **errp)
35
+{
36
+ MirrorState *s = FILTER_REDIRECTOR(obj);
37
+
38
+ s->vnet_hdr = value;
39
+}
40
+
41
static void filter_mirror_init(Object *obj)
42
{
52
{
43
MirrorState *s = FILTER_MIRROR(obj);
53
DMAMap needle = {};
44
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_init(Object *obj)
54
struct vhost_vdpa *v = dev->opaque;
45
55
struct vhost_vring_addr svq_addr;
46
static void filter_redirector_init(Object *obj)
56
- bool ok;
57
58
vhost_svq_get_vring_addr(svq, &svq_addr);
59
60
needle.translated_addr = svq_addr.desc_user_addr;
61
- ok = vhost_vdpa_svq_unmap_ring(v, &needle);
62
- if (unlikely(!ok)) {
63
- return false;
64
- }
65
+ vhost_vdpa_svq_unmap_ring(v, &needle);
66
67
needle.translated_addr = svq_addr.used_user_addr;
68
- return vhost_vdpa_svq_unmap_ring(v, &needle);
69
+ vhost_vdpa_svq_unmap_ring(v, &needle);
70
}
71
72
/**
73
@@ -XXX,XX +XXX,XX @@ err:
74
return false;
75
}
76
77
-static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
78
+static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
47
{
79
{
48
+ MirrorState *s = FILTER_REDIRECTOR(obj);
80
struct vhost_vdpa *v = dev->opaque;
49
+
81
50
object_property_add_str(obj, "indev", filter_redirector_get_indev,
82
if (!v->shadow_vqs) {
51
filter_redirector_set_indev, NULL);
83
- return true;
52
object_property_add_str(obj, "outdev", filter_redirector_get_outdev,
84
+ return;
53
filter_redirector_set_outdev, NULL);
85
}
54
+
86
55
+ s->vnet_hdr = false;
87
for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
56
+ object_property_add_bool(obj, "vnet_hdr_support",
88
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
57
+ filter_redirector_get_vnet_hdr,
89
- bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
58
+ filter_redirector_set_vnet_hdr, NULL);
90
- if (unlikely(!ok)) {
91
- return false;
92
- }
93
+ vhost_vdpa_svq_unmap_rings(dev, svq);
94
}
95
96
if (v->migration_blocker) {
97
migrate_del_blocker(v->migration_blocker);
98
}
99
- return true;
59
}
100
}
60
101
61
static void filter_mirror_fini(Object *obj)
102
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
62
diff --git a/qemu-options.hx b/qemu-options.hx
103
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
63
index XXXXXXX..XXXXXXX 100644
104
}
64
--- a/qemu-options.hx
105
vhost_vdpa_set_vring_ready(dev);
65
+++ b/qemu-options.hx
106
} else {
66
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
107
- ok = vhost_vdpa_svqs_stop(dev);
67
108
- if (unlikely(!ok)) {
68
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
109
- return -1;
69
110
- }
70
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
111
+ vhost_vdpa_svqs_stop(dev);
71
-outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
112
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
72
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
113
}
73
114
74
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
75
-@var{chardevid},and redirect indev's packet to filter.
76
+@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
77
+filter-redirector will redirect packet with vnet_hdr_len.
78
Create a filter-redirector we need to differ outdev id from indev id, id can not
79
be the same. we can just use indev or outdev, but at least one of indev or outdev
80
need to be specified.
81
--
115
--
82
2.7.4
116
2.7.4
83
117
84
118
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
We can unbind twice a file descriptor if we call twice
4
vhost_svq_set_svq_kick_fd because of this. Since it comes from vhost and
5
not from SVQ, that file descriptor could be a different thing that
6
guest's vhost notifier.
7
8
Likewise, it can happens the same if a guest start and stop the device
9
multiple times.
10
11
Reported-by: Lei Yang <leiyang@redhat.com>
12
Fixes: dff4426fa6 ("vhost: Add Shadow VirtQueue kick forwarding capabilities")
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/virtio/vhost-shadow-virtqueue.c | 4 ++--
18
1 file changed, 2 insertions(+), 2 deletions(-)
19
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
24
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
25
event_notifier_set_handler(svq_kick, NULL);
26
}
27
28
+ event_notifier_init_fd(svq_kick, svq_kick_fd);
29
/*
30
* event_notifier_set_handler already checks for guest's notifications if
31
* they arrive at the new file descriptor in the switch, so there is no
32
* need to explicitly check for them.
33
*/
34
if (poll_start) {
35
- event_notifier_init_fd(svq_kick, svq_kick_fd);
36
event_notifier_set(svq_kick);
37
event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
38
}
39
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
40
*/
41
void vhost_svq_stop(VhostShadowVirtqueue *svq)
42
{
43
- event_notifier_set_handler(&svq->svq_kick, NULL);
44
+ vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND);
45
g_autofree VirtQueueElement *next_avail_elem = NULL;
46
47
if (!svq->vq) {
48
--
49
2.7.4
50
51
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Reduce code duplication.
4
5
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
6
Acked-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/virtio/vhost-vdpa.c | 17 ++++++++---------
10
1 file changed, 8 insertions(+), 9 deletions(-)
11
12
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/virtio/vhost-vdpa.c
15
+++ b/hw/virtio/vhost-vdpa.c
16
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
17
/**
18
* Unmap a SVQ area in the device
19
*/
20
-static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
21
- const DMAMap *needle)
22
+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr)
23
{
24
- const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
25
+ const DMAMap needle = {
26
+ .translated_addr = addr,
27
+ };
28
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle);
29
hwaddr size;
30
int r;
31
32
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
33
static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
34
const VhostShadowVirtqueue *svq)
35
{
36
- DMAMap needle = {};
37
struct vhost_vdpa *v = dev->opaque;
38
struct vhost_vring_addr svq_addr;
39
40
vhost_svq_get_vring_addr(svq, &svq_addr);
41
42
- needle.translated_addr = svq_addr.desc_user_addr;
43
- vhost_vdpa_svq_unmap_ring(v, &needle);
44
+ vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr);
45
46
- needle.translated_addr = svq_addr.used_user_addr;
47
- vhost_vdpa_svq_unmap_ring(v, &needle);
48
+ vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr);
49
}
50
51
/**
52
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
53
ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
54
if (unlikely(!ok)) {
55
error_prepend(errp, "Cannot create vq device region: ");
56
- vhost_vdpa_svq_unmap_ring(v, &driver_region);
57
+ vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr);
58
}
59
addr->used_user_addr = device_region.iova;
60
61
--
62
2.7.4
63
64
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
COLO-Proxy just focus on packet payload, so we skip vnet header.
3
It was easier to allow vhost_svq_add to handle the memory. Now that we
4
will allow qemu to add elements to a SVQ without the guest's knowledge,
5
it's better to handle it in the caller.
4
6
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Acked-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
10
---
8
net/colo-compare.c | 8 ++++++--
11
hw/virtio/vhost-shadow-virtqueue.c | 10 ++++------
9
1 file changed, 6 insertions(+), 2 deletions(-)
12
1 file changed, 4 insertions(+), 6 deletions(-)
10
13
11
diff --git a/net/colo-compare.c b/net/colo-compare.c
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo-compare.c
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
14
+++ b/net/colo-compare.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
15
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
18
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
16
sec_ip_src, sec_ip_dst);
19
/**
20
* Add an element to a SVQ.
21
*
22
- * The caller must check that there is enough slots for the new element. It
23
- * takes ownership of the element: In case of failure not ENOSPC, it is free.
24
- *
25
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
26
*/
27
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
28
@@ -XXX,XX +XXX,XX @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
29
30
ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
31
if (unlikely(!ok)) {
32
- g_free(elem);
33
return -EINVAL;
17
}
34
}
18
35
19
+ offset = ppkt->vnet_hdr_len + offset;
36
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
20
+
37
virtio_queue_set_notification(svq->vq, false);
21
if (ppkt->size == spkt->size) {
38
22
- return memcmp(ppkt->data + offset, spkt->data + offset,
39
while (true) {
23
+ return memcmp(ppkt->data + offset,
40
- VirtQueueElement *elem;
24
+ spkt->data + offset,
41
+ g_autofree VirtQueueElement *elem;
25
spkt->size - offset);
42
int r;
26
} else {
43
27
trace_colo_compare_main("Net packet size are not the same");
44
if (svq->next_guest_avail_elem) {
28
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
45
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
29
*/
46
* queue the current guest descriptor and ignore kicks
30
if (ptcp->th_off > 5) {
47
* until some elements are used.
31
ptrdiff_t tcp_offset;
48
*/
32
+
49
- svq->next_guest_avail_elem = elem;
33
tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
50
+ svq->next_guest_avail_elem = g_steal_pointer(&elem);
34
- + (ptcp->th_off * 4);
51
}
35
+ + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
52
36
res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
53
/* VQ is full or broken, just return and ignore kicks */
37
} else if (ptcp->th_sum == stcp->th_sum) {
54
return;
38
res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
55
}
56
+ /* elem belongs to SVQ or external caller now */
57
+ elem = NULL;
58
}
59
60
virtio_queue_set_notification(svq->vq, true);
39
--
61
--
40
2.7.4
62
2.7.4
41
63
42
64
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
This patch change the filter_send() parameter from CharBackend to MirrorState,
3
Since we're going to allow SVQ to add elements without the guest's
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
knowledge and without its own VirtQueueElement, it's easier to check if
5
an element is a valid head checking a different thing than the
6
VirtQueueElement.
5
7
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Acked-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
11
---
9
net/filter-mirror.c | 10 +++++-----
12
hw/virtio/vhost-shadow-virtqueue.c | 3 ++-
10
1 file changed, 5 insertions(+), 5 deletions(-)
13
1 file changed, 2 insertions(+), 1 deletion(-)
11
14
12
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/net/filter-mirror.c
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
15
+++ b/net/filter-mirror.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
19
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
17
SocketReadState rs;
20
return NULL;
18
} MirrorState;
19
20
-static int filter_send(CharBackend *chr_out,
21
+static int filter_send(MirrorState *s,
22
const struct iovec *iov,
23
int iovcnt)
24
{
25
@@ -XXX,XX +XXX,XX @@ static int filter_send(CharBackend *chr_out,
26
}
21
}
27
22
28
len = htonl(size);
23
- if (unlikely(!svq->desc_state[used_elem.id].elem)) {
29
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len));
24
+ if (unlikely(!svq->desc_state[used_elem.id].ndescs)) {
30
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
25
qemu_log_mask(LOG_GUEST_ERROR,
31
if (ret != sizeof(len)) {
26
"Device %s says index %u is used, but it was not available",
32
goto err;
27
svq->vdev->name, used_elem.id);
28
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
33
}
29
}
34
30
35
buf = g_malloc(size);
31
num = svq->desc_state[used_elem.id].ndescs;
36
iov_to_buf(iov, iovcnt, 0, buf, size);
32
+ svq->desc_state[used_elem.id].ndescs = 0;
37
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size);
33
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
38
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
34
svq->desc_next[last_used_chain] = svq->free_head;
39
g_free(buf);
35
svq->free_head = used_elem.id;
40
if (ret != size) {
41
goto err;
42
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
43
MirrorState *s = FILTER_MIRROR(nf);
44
int ret;
45
46
- ret = filter_send(&s->chr_out, iov, iovcnt);
47
+ ret = filter_send(s, iov, iovcnt);
48
if (ret) {
49
error_report("filter mirror send failed(%s)", strerror(-ret));
50
}
51
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
52
int ret;
53
54
if (qemu_chr_fe_backend_connected(&s->chr_out)) {
55
- ret = filter_send(&s->chr_out, iov, iovcnt);
56
+ ret = filter_send(s, iov, iovcnt);
57
if (ret) {
58
error_report("filter redirector send failed(%s)", strerror(-ret));
59
}
60
--
36
--
61
2.7.4
37
2.7.4
62
38
63
39
diff view generated by jsdifflib
1
Spec said offloads should be le64, so use virtio_ldq_p() to guarantee
1
From: Eugenio Pérez <eperezma@redhat.com>
2
valid endian.
3
2
4
Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration")
3
As discussed in previous series [1], this memory barrier is useless with
5
Cc: qemu-stable@nongnu.org
4
the atomic read of used idx at vhost_svq_more_used. Deleting it.
6
Cc: Dmitry Fleytman <dfleytma@redhat.com>
5
6
[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02616.html
7
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Acked-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
11
---
9
hw/net/virtio-net.c | 2 ++
12
hw/virtio/vhost-shadow-virtqueue.c | 3 ---
10
1 file changed, 2 insertions(+)
13
1 file changed, 3 deletions(-)
11
14
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
15
+++ b/hw/net/virtio-net.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
16
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
19
@@ -XXX,XX +XXX,XX @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
17
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
20
if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
18
uint64_t supported_offloads;
21
return 0;
19
20
+ offloads = virtio_ldq_p(vdev, &offloads);
21
+
22
if (!n->has_vnet_hdr) {
23
return VIRTIO_NET_ERR;
24
}
22
}
23
-
24
- /* Make sure we read new used_idx */
25
- smp_rmb();
26
} while (true);
27
}
28
25
--
29
--
26
2.7.4
30
2.7.4
27
31
28
32
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We can use this property flush and send packet with vnet_hdr_len.
3
Since QEMU will be able to inject new elements on CVQ to restore the
4
state, we need not to depend on a VirtQueueElement to know if a new
5
element has been used by the device or not. Instead of check that, check
6
if there are new elements only using used idx on vhost_svq_flush.
4
7
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Acked-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
11
---
8
net/colo-compare.c | 8 ++++++--
12
hw/virtio/vhost-shadow-virtqueue.c | 11 +++++++----
9
net/colo.c | 3 ++-
13
1 file changed, 7 insertions(+), 4 deletions(-)
10
net/colo.h | 4 +++-
11
net/filter-rewriter.c | 2 +-
12
4 files changed, 12 insertions(+), 5 deletions(-)
13
14
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo-compare.c
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/net/colo-compare.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
19
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
19
Connection *conn;
20
size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
20
21
{
21
if (mode == PRIMARY_IN) {
22
int64_t start_us = g_get_monotonic_time();
22
- pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
23
+ uint32_t len;
23
+ pkt = packet_new(s->pri_rs.buf,
24
+
24
+ s->pri_rs.packet_len,
25
do {
25
+ s->pri_rs.vnet_hdr_len);
26
- uint32_t len;
26
} else {
27
- VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
27
- pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
28
- if (elem) {
28
+ pkt = packet_new(s->sec_rs.buf,
29
- return len;
29
+ s->sec_rs.packet_len,
30
+ if (vhost_svq_more_used(svq)) {
30
+ s->sec_rs.vnet_hdr_len);
31
+ break;
31
}
32
}
32
33
33
if (parse_packet_early(pkt)) {
34
if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
34
diff --git a/net/colo.c b/net/colo.c
35
return 0;
35
index XXXXXXX..XXXXXXX 100644
36
}
36
--- a/net/colo.c
37
} while (true);
37
+++ b/net/colo.c
38
+
38
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque)
39
+ vhost_svq_get_buf(svq, &len);
39
g_slice_free(Connection, conn);
40
+ return len;
40
}
41
}
41
42
42
-Packet *packet_new(const void *data, int size)
43
/**
43
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
44
{
45
Packet *pkt = g_slice_new(Packet);
46
47
pkt->data = g_memdup(data, size);
48
pkt->size = size;
49
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
50
+ pkt->vnet_hdr_len = vnet_hdr_len;
51
52
return pkt;
53
}
54
diff --git a/net/colo.h b/net/colo.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/net/colo.h
57
+++ b/net/colo.h
58
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
59
int size;
60
/* Time of packet creation, in wall clock ms */
61
int64_t creation_ms;
62
+ /* Get vnet_hdr_len from filter */
63
+ uint32_t vnet_hdr_len;
64
} Packet;
65
66
typedef struct ConnectionKey {
67
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
68
ConnectionKey *key,
69
GQueue *conn_list);
70
void connection_hashtable_reset(GHashTable *connection_track_table);
71
-Packet *packet_new(const void *data, int size);
72
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
73
void packet_destroy(void *opaque, void *user_data);
74
75
#endif /* QEMU_COLO_PROXY_H */
76
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/net/filter-rewriter.c
79
+++ b/net/filter-rewriter.c
80
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
81
char *buf = g_malloc0(size);
82
83
iov_to_buf(iov, iovcnt, 0, buf, size);
84
- pkt = packet_new(buf, size);
85
+ pkt = packet_new(buf, size, 0);
86
g_free(buf);
87
88
/*
89
--
44
--
90
2.7.4
45
2.7.4
91
46
92
47
diff view generated by jsdifflib
1
From: Michal Privoznik <mprivozn@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We have a function that checks if given number is power of two.
3
This is used by the backend to perform actions before the device is
4
We should prefer it instead of expanding the check on our own.
4
started.
5
5
6
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
6
In particular, vdpa net use it to map CVQ buffers to the device, so it
7
can send control commands using them.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
hw/net/virtio-net.c | 2 +-
13
hw/net/vhost_net.c | 7 +++++++
10
1 file changed, 1 insertion(+), 1 deletion(-)
14
include/net/net.h | 2 ++
15
2 files changed, 9 insertions(+)
11
16
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
17
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
19
--- a/hw/net/vhost_net.c
15
+++ b/hw/net/virtio-net.c
20
+++ b/hw/net/vhost_net.c
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
21
@@ -XXX,XX +XXX,XX @@ static int vhost_net_start_one(struct vhost_net *net,
17
*/
22
struct vhost_vring_file file = { };
18
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
23
int r;
19
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
24
20
- (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
25
+ if (net->nc->info->start) {
21
+ !is_power_of_2(n->net_conf.rx_queue_size)) {
26
+ r = net->nc->info->start(net->nc);
22
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
27
+ if (r < 0) {
23
"must be a power of 2 between %d and %d.",
28
+ return r;
24
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
29
+ }
30
+ }
31
+
32
r = vhost_dev_enable_notifiers(&net->dev, dev);
33
if (r < 0) {
34
goto fail_notifiers;
35
diff --git a/include/net/net.h b/include/net/net.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/include/net/net.h
38
+++ b/include/net/net.h
39
@@ -XXX,XX +XXX,XX @@ typedef struct NICConf {
40
41
typedef void (NetPoll)(NetClientState *, bool enable);
42
typedef bool (NetCanReceive)(NetClientState *);
43
+typedef int (NetStart)(NetClientState *);
44
typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t);
45
typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int);
46
typedef void (NetCleanup) (NetClientState *);
47
@@ -XXX,XX +XXX,XX @@ typedef struct NetClientInfo {
48
NetReceive *receive_raw;
49
NetReceiveIOV *receive_iov;
50
NetCanReceive *can_receive;
51
+ NetStart *start;
52
NetCleanup *cleanup;
53
LinkStatusChanged *link_status_changed;
54
QueryRxFilter *query_rx_filter;
25
--
55
--
26
2.7.4
56
2.7.4
27
57
28
58
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add a flag to decide whether net_fill_rstate() need read
3
Used by the backend to perform actions after the device is stopped.
4
the vnet_hdr_len or not.
5
4
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
In particular, vdpa net use it to unmap CVQ buffers to the device,
7
Suggested-by: Jason Wang <jasowang@redhat.com>
6
cleaning the actions performed in prepare().
7
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Acked-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
11
---
10
include/net/net.h | 9 +++++++--
12
hw/net/vhost_net.c | 3 +++
11
net/colo-compare.c | 4 ++--
13
include/net/net.h | 2 ++
12
net/filter-mirror.c | 2 +-
14
2 files changed, 5 insertions(+)
13
net/net.c | 36 ++++++++++++++++++++++++++++++++----
14
net/socket.c | 8 ++++----
15
5 files changed, 46 insertions(+), 13 deletions(-)
16
15
16
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/vhost_net.c
19
+++ b/hw/net/vhost_net.c
20
@@ -XXX,XX +XXX,XX @@ static void vhost_net_stop_one(struct vhost_net *net,
21
net->nc->info->poll(net->nc, true);
22
}
23
vhost_dev_stop(&net->dev, dev);
24
+ if (net->nc->info->stop) {
25
+ net->nc->info->stop(net->nc);
26
+ }
27
vhost_dev_disable_notifiers(&net->dev, dev);
28
}
29
17
diff --git a/include/net/net.h b/include/net/net.h
30
diff --git a/include/net/net.h b/include/net/net.h
18
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/net.h
32
--- a/include/net/net.h
20
+++ b/include/net/net.h
33
+++ b/include/net/net.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct NICState {
34
@@ -XXX,XX +XXX,XX @@ typedef struct NICConf {
22
} NICState;
35
typedef void (NetPoll)(NetClientState *, bool enable);
23
36
typedef bool (NetCanReceive)(NetClientState *);
24
struct SocketReadState {
37
typedef int (NetStart)(NetClientState *);
25
- int state; /* 0 = getting length, 1 = getting data */
38
+typedef void (NetStop)(NetClientState *);
26
+ /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */
39
typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t);
27
+ int state;
40
typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int);
28
+ /* This flag decide whether to read the vnet_hdr_len field */
41
typedef void (NetCleanup) (NetClientState *);
29
+ bool vnet_hdr;
42
@@ -XXX,XX +XXX,XX @@ typedef struct NetClientInfo {
30
uint32_t index;
43
NetReceiveIOV *receive_iov;
31
uint32_t packet_len;
44
NetCanReceive *can_receive;
32
+ uint32_t vnet_hdr_len;
45
NetStart *start;
33
uint8_t buf[NET_BUFSIZE];
46
+ NetStop *stop;
34
SocketReadStateFinalize *finalize;
47
NetCleanup *cleanup;
35
};
48
LinkStatusChanged *link_status_changed;
36
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
49
QueryRxFilter *query_rx_filter;
37
void print_net_client(Monitor *mon, NetClientState *nc);
38
void hmp_info_network(Monitor *mon, const QDict *qdict);
39
void net_socket_rs_init(SocketReadState *rs,
40
- SocketReadStateFinalize *finalize);
41
+ SocketReadStateFinalize *finalize,
42
+ bool vnet_hdr);
43
44
/* NIC info */
45
46
diff --git a/net/colo-compare.c b/net/colo-compare.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/net/colo-compare.c
49
+++ b/net/colo-compare.c
50
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
51
return;
52
}
53
54
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
55
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
56
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
57
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
58
59
g_queue_init(&s->conn_list);
60
61
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/net/filter-mirror.c
64
+++ b/net/filter-mirror.c
65
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
66
}
67
}
68
69
- net_socket_rs_init(&s->rs, redirector_rs_finalize);
70
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
71
72
if (s->indev) {
73
chr = qemu_chr_find(s->indev);
74
diff --git a/net/net.c b/net/net.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/net/net.c
77
+++ b/net/net.c
78
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_net_opts = {
79
};
80
81
void net_socket_rs_init(SocketReadState *rs,
82
- SocketReadStateFinalize *finalize)
83
+ SocketReadStateFinalize *finalize,
84
+ bool vnet_hdr)
85
{
86
rs->state = 0;
87
+ rs->vnet_hdr = vnet_hdr;
88
rs->index = 0;
89
rs->packet_len = 0;
90
+ rs->vnet_hdr_len = 0;
91
memset(rs->buf, 0, sizeof(rs->buf));
92
rs->finalize = finalize;
93
}
94
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
95
unsigned int l;
96
97
while (size > 0) {
98
- /* reassemble a packet from the network */
99
- switch (rs->state) { /* 0 = getting length, 1 = getting data */
100
+ /* Reassemble a packet from the network.
101
+ * 0 = getting length.
102
+ * 1 = getting vnet header length.
103
+ * 2 = getting data.
104
+ */
105
+ switch (rs->state) {
106
case 0:
107
l = 4 - rs->index;
108
if (l > size) {
109
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
110
/* got length */
111
rs->packet_len = ntohl(*(uint32_t *)rs->buf);
112
rs->index = 0;
113
- rs->state = 1;
114
+ if (rs->vnet_hdr) {
115
+ rs->state = 1;
116
+ } else {
117
+ rs->state = 2;
118
+ rs->vnet_hdr_len = 0;
119
+ }
120
}
121
break;
122
case 1:
123
+ l = 4 - rs->index;
124
+ if (l > size) {
125
+ l = size;
126
+ }
127
+ memcpy(rs->buf + rs->index, buf, l);
128
+ buf += l;
129
+ size -= l;
130
+ rs->index += l;
131
+ if (rs->index == 4) {
132
+ /* got vnet header length */
133
+ rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf);
134
+ rs->index = 0;
135
+ rs->state = 2;
136
+ }
137
+ break;
138
+ case 2:
139
l = rs->packet_len - rs->index;
140
if (l > size) {
141
l = size;
142
diff --git a/net/socket.c b/net/socket.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/net/socket.c
145
+++ b/net/socket.c
146
@@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque)
147
closesocket(s->fd);
148
149
s->fd = -1;
150
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
151
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
152
s->nc.link_down = true;
153
memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
154
155
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
156
s->fd = fd;
157
s->listen_fd = -1;
158
s->send_fn = net_socket_send_dgram;
159
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
160
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
161
net_socket_read_poll(s, true);
162
163
/* mcast: save bound address as dst */
164
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
165
166
s->fd = fd;
167
s->listen_fd = -1;
168
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
169
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
170
171
/* Disable Nagle algorithm on TCP sockets to reduce latency */
172
socket_set_nodelay(fd);
173
@@ -XXX,XX +XXX,XX @@ static int net_socket_listen_init(NetClientState *peer,
174
s->fd = -1;
175
s->listen_fd = fd;
176
s->nc.link_down = true;
177
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
178
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
179
180
qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
181
return 0;
182
--
50
--
183
2.7.4
51
2.7.4
184
52
185
53
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
3
Next patches will add a new info callback to restore NIC status through
4
CVQ. Since only the CVQ vhost device is needed, create it with a new
5
NetClientInfo.
6
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Acked-by: Jason Wang <jasowang@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
10
---
6
docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++
11
net/vhost-vdpa.c | 12 +++++++++++-
7
1 file changed, 26 insertions(+)
12
1 file changed, 11 insertions(+), 1 deletion(-)
8
13
9
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
14
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
10
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
11
--- a/docs/colo-proxy.txt
16
--- a/net/vhost-vdpa.c
12
+++ b/docs/colo-proxy.txt
17
+++ b/net/vhost-vdpa.c
13
@@ -XXX,XX +XXX,XX @@ Secondary(ip:3.3.3.8):
18
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
14
-chardev socket,id=red1,host=3.3.3.3,port=9004
19
return true;
15
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
20
}
16
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
21
17
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
22
+static NetClientInfo net_vhost_vdpa_cvq_info = {
23
+ .type = NET_CLIENT_DRIVER_VHOST_VDPA,
24
+ .size = sizeof(VhostVDPAState),
25
+ .receive = vhost_vdpa_receive,
26
+ .cleanup = vhost_vdpa_cleanup,
27
+ .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
28
+ .has_ufo = vhost_vdpa_has_ufo,
29
+ .check_peer_type = vhost_vdpa_check_peer_type,
30
+};
18
+
31
+
19
+If you want to use virtio-net-pci or other driver with vnet_header:
32
/**
20
+
33
* Do not forward commands not supported by SVQ. Otherwise, the device could
21
+Primary(ip:3.3.3.3):
34
* accept it and qemu would not know how to update the device model.
22
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
35
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
23
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
36
nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
24
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
37
name);
25
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
38
} else {
26
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
39
- nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer,
27
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
40
+ nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
28
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
41
device, name);
29
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
42
}
30
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
43
snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA);
31
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
32
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
33
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
34
+
35
+Secondary(ip:3.3.3.8):
36
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
37
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
38
+-chardev socket,id=red0,host=3.3.3.3,port=9003
39
+-chardev socket,id=red1,host=3.3.3.3,port=9004
40
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
41
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
42
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support
43
44
Note:
45
a.COLO-proxy must work with COLO-frame and Block-replication.
46
--
44
--
47
2.7.4
45
2.7.4
48
46
49
47
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for colo-compare, default is disabled.
3
As this series will reuse them to restore the device state at the end of
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
a migration (or a device start), let's allocate only once at the device
5
You can use it for example:
5
start so we don't duplicate their map and unmap.
6
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
6
7
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
COLO-compare can get vnet header length from filter,
8
Acked-by: Jason Wang <jasowang@redhat.com>
9
Add vnet_hdr_len to struct packet and output packet with
10
the vnet_hdr_len.
11
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
10
---
15
net/colo-compare.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------
11
net/vhost-vdpa.c | 123 ++++++++++++++++++++++++++-----------------------------
16
qemu-options.hx | 4 ++--
12
1 file changed, 58 insertions(+), 65 deletions(-)
17
2 files changed, 55 insertions(+), 9 deletions(-)
13
18
14
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
16
--- a/net/vhost-vdpa.c
22
+++ b/net/colo-compare.c
17
+++ b/net/vhost-vdpa.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
18
@@ -XXX,XX +XXX,XX @@ static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
24
CharBackend chr_out;
19
return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
25
SocketReadState pri_rs;
20
}
26
SocketReadState sec_rs;
21
27
+ bool vnet_hdr;
22
-/** Copy and map a guest buffer. */
28
23
-static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
29
/* connection list: the connections belonged to this NIC could be found
24
- const struct iovec *out_data,
30
* in this list.
25
- size_t out_num, size_t data_len, void *buf,
31
@@ -XXX,XX +XXX,XX @@ enum {
26
- size_t *written, bool write)
32
27
+/** Map CVQ buffer. */
33
static int compare_chr_send(CompareState *s,
28
+static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
34
const uint8_t *buf,
29
+ bool write)
35
- uint32_t size);
36
+ uint32_t size,
37
+ uint32_t vnet_hdr_len);
38
39
static gint seq_sorter(Packet *a, Packet *b, gpointer data)
40
{
30
{
41
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
31
DMAMap map = {};
42
}
32
int r;
43
33
44
if (result) {
34
- if (unlikely(!data_len)) {
45
- ret = compare_chr_send(s, pkt->data, pkt->size);
35
- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
46
+ ret = compare_chr_send(s,
36
- __func__, write ? "in" : "out");
47
+ pkt->data,
37
- return false;
48
+ pkt->size,
38
- }
49
+ pkt->vnet_hdr_len);
39
-
50
if (ret < 0) {
40
- *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
51
error_report("colo_send_primary_packet failed");
41
map.translated_addr = (hwaddr)(uintptr_t)buf;
52
}
42
- map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
53
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
43
+ map.size = size - 1;
54
44
map.perm = write ? IOMMU_RW : IOMMU_RO,
55
static int compare_chr_send(CompareState *s,
45
r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
56
const uint8_t *buf,
46
if (unlikely(r != IOVA_OK)) {
57
- uint32_t size)
47
error_report("Cannot map injected element");
58
+ uint32_t size,
48
- return false;
59
+ uint32_t vnet_hdr_len)
49
+ return r;
50
}
51
52
r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
53
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
54
goto dma_map_err;
55
}
56
57
- return true;
58
+ return 0;
59
60
dma_map_err:
61
vhost_iova_tree_remove(v->iova_tree, map);
62
- return false;
63
+ return r;
64
}
65
66
-/**
67
- * Copy the guest element into a dedicated buffer suitable to be sent to NIC
68
- *
69
- * @iov: [0] is the out buffer, [1] is the in one
70
- */
71
-static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
72
- VirtQueueElement *elem,
73
- struct iovec *iov)
74
+static int vhost_vdpa_net_cvq_start(NetClientState *nc)
60
{
75
{
61
int ret = 0;
76
- size_t in_copied;
62
uint32_t len = htonl(size);
77
- bool ok;
63
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s,
78
+ VhostVDPAState *s;
64
goto err;
79
+ int r;
65
}
80
66
81
- iov[0].iov_base = s->cvq_cmd_out_buffer;
67
+ if (s->vnet_hdr) {
82
- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
68
+ /*
83
- vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
69
+ * We send vnet header len make other module(like filter-redirector)
84
- &iov[0].iov_len, false);
70
+ * know how to parse net packet correctly.
85
- if (unlikely(!ok)) {
71
+ */
86
- return false;
72
+ len = htonl(vnet_hdr_len);
87
+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
73
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
88
+
74
+ if (ret != sizeof(len)) {
89
+ s = DO_UPCAST(VhostVDPAState, nc, nc);
75
+ goto err;
90
+ if (!s->vhost_vdpa.shadow_vqs_enabled) {
76
+ }
91
+ return 0;
92
}
93
94
- iov[1].iov_base = s->cvq_cmd_in_buffer;
95
- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
96
- sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
97
- &in_copied, true);
98
- if (unlikely(!ok)) {
99
+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
100
+ vhost_vdpa_net_cvq_cmd_page_len(), false);
101
+ if (unlikely(r < 0)) {
102
+ return r;
77
+ }
103
+ }
78
+
104
+
79
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
105
+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer,
80
if (ret != size) {
106
+ vhost_vdpa_net_cvq_cmd_page_len(), true);
81
goto err;
107
+ if (unlikely(r < 0)) {
82
@@ -XXX,XX +XXX,XX @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp)
108
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
83
s->outdev = g_strdup(value);
109
- return false;
84
}
110
}
85
111
86
+static bool compare_get_vnet_hdr(Object *obj, Error **errp)
112
- iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
113
- return true;
114
+ return r;
115
+}
116
+
117
+static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
87
+{
118
+{
88
+ CompareState *s = COLO_COMPARE(obj);
119
+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
89
+
120
+
90
+ return s->vnet_hdr;
121
+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
91
+}
122
+
92
+
123
+ if (s->vhost_vdpa.shadow_vqs_enabled) {
93
+static void compare_set_vnet_hdr(Object *obj,
124
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
94
+ bool value,
125
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer);
95
+ Error **errp)
126
+ }
96
+{
127
}
97
+ CompareState *s = COLO_COMPARE(obj);
128
98
+
129
static NetClientInfo net_vhost_vdpa_cvq_info = {
99
+ s->vnet_hdr = value;
130
.type = NET_CLIENT_DRIVER_VHOST_VDPA,
100
+}
131
.size = sizeof(VhostVDPAState),
101
+
132
.receive = vhost_vdpa_receive,
102
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
133
+ .start = vhost_vdpa_net_cvq_start,
134
+ .stop = vhost_vdpa_net_cvq_stop,
135
.cleanup = vhost_vdpa_cleanup,
136
.has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
137
.has_ufo = vhost_vdpa_has_ufo,
138
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_cvq_info = {
139
* Do not forward commands not supported by SVQ. Otherwise, the device could
140
* accept it and qemu would not know how to update the device model.
141
*/
142
-static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
143
- size_t out_num)
144
+static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len)
103
{
145
{
104
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
146
struct virtio_net_ctrl_hdr ctrl;
105
147
- size_t n;
106
if (packet_enqueue(s, PRIMARY_IN)) {
148
107
trace_colo_compare_main("primary: unsupported packet in");
149
- n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
108
- compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
150
- if (unlikely(n < sizeof(ctrl))) {
109
+ compare_chr_send(s,
151
+ if (unlikely(len < sizeof(ctrl))) {
110
+ pri_rs->buf,
152
qemu_log_mask(LOG_GUEST_ERROR,
111
+ pri_rs->packet_len,
153
- "%s: invalid legnth of out buffer %zu\n", __func__, n);
112
+ pri_rs->vnet_hdr_len);
154
+ "%s: invalid legnth of out buffer %zu\n", __func__, len);
113
} else {
155
return false;
114
/* compare connection */
156
}
115
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
157
116
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
158
+ memcpy(&ctrl, out_buf, sizeof(ctrl));
117
return;
159
switch (ctrl.class) {
118
}
160
case VIRTIO_NET_CTRL_MAC:
119
161
switch (ctrl.cmd) {
120
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
162
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
121
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
163
VhostVDPAState *s = opaque;
122
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
164
size_t in_len, dev_written;
123
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
165
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
124
166
- /* out and in buffers sent to the device */
125
g_queue_init(&s->conn_list);
167
- struct iovec dev_buffers[2] = {
126
168
- { .iov_base = s->cvq_cmd_out_buffer },
127
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
169
- { .iov_base = s->cvq_cmd_in_buffer },
128
170
+ /* Out buffer sent to both the vdpa device and the device model */
129
while (!g_queue_is_empty(&conn->primary_list)) {
171
+ struct iovec out = {
130
pkt = g_queue_pop_head(&conn->primary_list);
172
+ .iov_base = s->cvq_cmd_out_buffer,
131
- compare_chr_send(s, pkt->data, pkt->size);
173
+ };
132
+ compare_chr_send(s,
174
+ /* In buffer sent to the device */
133
+ pkt->data,
175
+ const struct iovec dev_in = {
134
+ pkt->size,
176
+ .iov_base = s->cvq_cmd_in_buffer,
135
+ pkt->vnet_hdr_len);
177
+ .iov_len = sizeof(virtio_net_ctrl_ack),
136
packet_destroy(pkt, NULL);
178
};
137
}
179
/* in buffer used for device model */
138
while (!g_queue_is_empty(&conn->secondary_list)) {
180
const struct iovec in = {
139
@@ -XXX,XX +XXX,XX @@ static void colo_compare_class_init(ObjectClass *oc, void *data)
181
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
140
182
int r = -EINVAL;
141
static void colo_compare_init(Object *obj)
183
bool ok;
142
{
184
143
+ CompareState *s = COLO_COMPARE(obj);
185
- ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
144
+
186
- if (unlikely(!ok)) {
145
object_property_add_str(obj, "primary_in",
187
- goto out;
146
compare_get_pri_indev, compare_set_pri_indev,
188
- }
147
NULL);
189
-
148
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
190
- ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
149
object_property_add_str(obj, "outdev",
191
+ out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
150
compare_get_outdev, compare_set_outdev,
192
+ s->cvq_cmd_out_buffer,
151
NULL);
193
+ vhost_vdpa_net_cvq_cmd_len());
152
+
194
+ ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len);
153
+ s->vnet_hdr = false;
195
if (unlikely(!ok)) {
154
+ object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
196
goto out;
155
+ compare_set_vnet_hdr, NULL);
197
}
156
}
198
157
199
- r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
158
static void colo_compare_finalize(Object *obj)
200
+ r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem);
159
diff --git a/qemu-options.hx b/qemu-options.hx
201
if (unlikely(r != 0)) {
160
index XXXXXXX..XXXXXXX 100644
202
if (unlikely(r == -ENOSPC)) {
161
--- a/qemu-options.hx
203
qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
162
+++ b/qemu-options.hx
204
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
163
@@ -XXX,XX +XXX,XX @@ Dump the network traffic on netdev @var{dev} to the file specified by
205
goto out;
164
The file format is libpcap, so it can be analyzed with tools such as tcpdump
206
}
165
or Wireshark.
207
166
208
- memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
167
-@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
209
+ memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status));
168
-outdev=@var{chardevid}
210
if (status != VIRTIO_NET_OK) {
169
+@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support]
211
goto out;
170
212
}
171
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
213
172
secondary packet. If the packets are same, we will output primary
214
status = VIRTIO_NET_ERR;
173
packet to outdev@var{chardevid}, else we will notify colo-frame
215
- virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
174
do checkpoint and send primary packet to outdev@var{chardevid}.
216
+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
175
+if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len.
217
if (status != VIRTIO_NET_OK) {
176
218
error_report("Bad CVQ processing in model");
177
we must use it with the help of filter-mirror and filter-redirector.
219
}
220
@@ -XXX,XX +XXX,XX @@ out:
221
}
222
vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
223
g_free(elem);
224
- if (dev_buffers[0].iov_base) {
225
- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
226
- }
227
- if (dev_buffers[1].iov_base) {
228
- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
229
- }
230
return r;
231
}
178
232
179
--
233
--
180
2.7.4
234
2.7.4
181
235
182
236
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
This patch change the compare_chr_send() parameter from CharBackend to CompareState,
3
So we can reuse it to inject state messages.
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
5
4
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
6
Acked-by: Jason Wang <jasowang@redhat.com>
7
--
8
v7:
9
* Remove double free error
10
11
v6:
12
* Do not assume in buffer sent to the device is sizeof(virtio_net_ctrl_ack)
13
14
v5:
15
* Do not use an artificial !NULL VirtQueueElement
16
* Use only out size instead of iovec dev_buffers for these functions.
17
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
19
---
9
net/colo-compare.c | 14 +++++++-------
20
net/vhost-vdpa.c | 59 ++++++++++++++++++++++++++++++++++++--------------------
10
1 file changed, 7 insertions(+), 7 deletions(-)
21
1 file changed, 38 insertions(+), 21 deletions(-)
11
22
12
diff --git a/net/colo-compare.c b/net/colo-compare.c
23
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
13
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
14
--- a/net/colo-compare.c
25
--- a/net/vhost-vdpa.c
15
+++ b/net/colo-compare.c
26
+++ b/net/vhost-vdpa.c
16
@@ -XXX,XX +XXX,XX @@ enum {
27
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
17
SECONDARY_IN,
18
};
19
20
-static int compare_chr_send(CharBackend *out,
21
+static int compare_chr_send(CompareState *s,
22
const uint8_t *buf,
23
uint32_t size);
24
25
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
26
}
27
28
if (result) {
29
- ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size);
30
+ ret = compare_chr_send(s, pkt->data, pkt->size);
31
if (ret < 0) {
32
error_report("colo_send_primary_packet failed");
33
}
34
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
35
}
28
}
36
}
29
}
37
30
38
-static int compare_chr_send(CharBackend *out,
31
+static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
39
+static int compare_chr_send(CompareState *s,
32
+ size_t in_len)
40
const uint8_t *buf,
33
+{
41
uint32_t size)
34
+ /* Buffers for the device */
35
+ const struct iovec out = {
36
+ .iov_base = s->cvq_cmd_out_buffer,
37
+ .iov_len = out_len,
38
+ };
39
+ const struct iovec in = {
40
+ .iov_base = s->cvq_cmd_in_buffer,
41
+ .iov_len = sizeof(virtio_net_ctrl_ack),
42
+ };
43
+ VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
44
+ int r;
45
+
46
+ r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
47
+ if (unlikely(r != 0)) {
48
+ if (unlikely(r == -ENOSPC)) {
49
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
50
+ __func__);
51
+ }
52
+ return r;
53
+ }
54
+
55
+ /*
56
+ * We can poll here since we've had BQL from the time we sent the
57
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
58
+ * when BQL is released
59
+ */
60
+ return vhost_svq_poll(svq);
61
+}
62
+
63
static NetClientInfo net_vhost_vdpa_cvq_info = {
64
.type = NET_CLIENT_DRIVER_VHOST_VDPA,
65
.size = sizeof(VhostVDPAState),
66
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
67
void *opaque)
42
{
68
{
43
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CharBackend *out,
69
VhostVDPAState *s = opaque;
44
return 0;
70
- size_t in_len, dev_written;
71
+ size_t in_len;
72
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
73
/* Out buffer sent to both the vdpa device and the device model */
74
struct iovec out = {
75
.iov_base = s->cvq_cmd_out_buffer,
76
};
77
- /* In buffer sent to the device */
78
- const struct iovec dev_in = {
79
- .iov_base = s->cvq_cmd_in_buffer,
80
- .iov_len = sizeof(virtio_net_ctrl_ack),
81
- };
82
/* in buffer used for device model */
83
const struct iovec in = {
84
.iov_base = &status,
85
.iov_len = sizeof(status),
86
};
87
- int r = -EINVAL;
88
+ ssize_t dev_written = -EINVAL;
89
bool ok;
90
91
out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
92
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
93
goto out;
45
}
94
}
46
95
47
- ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
96
- r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem);
48
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
97
- if (unlikely(r != 0)) {
49
if (ret != sizeof(len)) {
98
- if (unlikely(r == -ENOSPC)) {
50
goto err;
99
- qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
100
- __func__);
101
- }
102
+ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
103
+ if (unlikely(dev_written < 0)) {
104
goto out;
51
}
105
}
52
106
53
- ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
107
- /*
54
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
108
- * We can poll here since we've had BQL from the time we sent the
55
if (ret != size) {
109
- * descriptor. Also, we need to take the answer before SVQ pulls by itself,
56
goto err;
110
- * when BQL is released
111
- */
112
- dev_written = vhost_svq_poll(svq);
113
if (unlikely(dev_written < sizeof(status))) {
114
error_report("Insufficient written data (%zu)", dev_written);
115
goto out;
116
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
117
118
memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status));
119
if (status != VIRTIO_NET_OK) {
120
- goto out;
121
+ return VIRTIO_NET_ERR;
57
}
122
}
58
@@ -XXX,XX +XXX,XX @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)
123
59
124
status = VIRTIO_NET_ERR;
60
if (packet_enqueue(s, PRIMARY_IN)) {
125
@@ -XXX,XX +XXX,XX @@ out:
61
trace_colo_compare_main("primary: unsupported packet in");
62
- compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len);
63
+ compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
64
} else {
65
/* compare connection */
66
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
67
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
68
69
while (!g_queue_is_empty(&conn->primary_list)) {
70
pkt = g_queue_pop_head(&conn->primary_list);
71
- compare_chr_send(&s->chr_out, pkt->data, pkt->size);
72
+ compare_chr_send(s, pkt->data, pkt->size);
73
packet_destroy(pkt, NULL);
74
}
126
}
75
while (!g_queue_is_empty(&conn->secondary_list)) {
127
vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
128
g_free(elem);
129
- return r;
130
+ return dev_written < 0 ? dev_written : 0;
131
}
132
133
static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
76
--
134
--
77
2.7.4
135
2.7.4
78
136
79
137
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Add vnet_hdr_len arguments in NetClientState
3
It allows per-net client operations right after device's successful
4
that make other module get real vnet_hdr_len easily.
4
start. In particular, to load the device status.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Vhost-vdpa net will use it to add the CVQ buffers to restore the device
7
status.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
include/net/net.h | 1 +
13
hw/net/vhost_net.c | 7 +++++++
10
net/net.c | 1 +
14
include/net/net.h | 2 ++
11
2 files changed, 2 insertions(+)
15
2 files changed, 9 insertions(+)
12
16
17
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/vhost_net.c
20
+++ b/hw/net/vhost_net.c
21
@@ -XXX,XX +XXX,XX @@ static int vhost_net_start_one(struct vhost_net *net,
22
}
23
}
24
}
25
+
26
+ if (net->nc->info->load) {
27
+ r = net->nc->info->load(net->nc);
28
+ if (r < 0) {
29
+ goto fail;
30
+ }
31
+ }
32
return 0;
33
fail:
34
file.fd = -1;
13
diff --git a/include/net/net.h b/include/net/net.h
35
diff --git a/include/net/net.h b/include/net/net.h
14
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
15
--- a/include/net/net.h
37
--- a/include/net/net.h
16
+++ b/include/net/net.h
38
+++ b/include/net/net.h
17
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
39
@@ -XXX,XX +XXX,XX @@ typedef struct NICConf {
18
unsigned int queue_index;
40
typedef void (NetPoll)(NetClientState *, bool enable);
19
unsigned rxfilter_notify_enabled:1;
41
typedef bool (NetCanReceive)(NetClientState *);
20
int vring_enable;
42
typedef int (NetStart)(NetClientState *);
21
+ int vnet_hdr_len;
43
+typedef int (NetLoad)(NetClientState *);
22
QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
44
typedef void (NetStop)(NetClientState *);
23
};
45
typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t);
24
46
typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int);
25
diff --git a/net/net.c b/net/net.c
47
@@ -XXX,XX +XXX,XX @@ typedef struct NetClientInfo {
26
index XXXXXXX..XXXXXXX 100644
48
NetReceiveIOV *receive_iov;
27
--- a/net/net.c
49
NetCanReceive *can_receive;
28
+++ b/net/net.c
50
NetStart *start;
29
@@ -XXX,XX +XXX,XX @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
51
+ NetLoad *load;
30
return;
52
NetStop *stop;
31
}
53
NetCleanup *cleanup;
32
54
LinkStatusChanged *link_status_changed;
33
+ nc->vnet_hdr_len = len;
34
nc->info->set_vnet_hdr_len(nc, len);
35
}
36
37
--
55
--
38
2.7.4
56
2.7.4
39
57
40
58
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-rewriter, default is disabled.
3
This is needed so the destination vdpa device see the same state a the
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
guest set in the source.
5
You can use it for example:
6
-object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support
7
5
8
We get the vnet_hdr_len from NetClientState that make us
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
parse net packet correctly.
7
Acked-by: Jason Wang <jasowang@redhat.com>
10
11
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
9
---
14
net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++-
10
net/vhost-vdpa.c | 40 ++++++++++++++++++++++++++++++++++++++++
15
qemu-options.hx | 4 ++--
11
1 file changed, 40 insertions(+)
16
2 files changed, 38 insertions(+), 3 deletions(-)
17
12
18
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
13
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
19
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
20
--- a/net/filter-rewriter.c
15
--- a/net/vhost-vdpa.c
21
+++ b/net/filter-rewriter.c
16
+++ b/net/vhost-vdpa.c
22
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
23
#include "qemu-common.h"
18
return vhost_svq_poll(svq);
24
#include "qapi/error.h"
19
}
25
#include "qapi/qmp/qerror.h"
20
26
+#include "qemu/error-report.h"
21
+static int vhost_vdpa_net_load(NetClientState *nc)
27
#include "qapi-visit.h"
22
+{
28
#include "qom/object.h"
23
+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
29
#include "qemu/main-loop.h"
24
+ const struct vhost_vdpa *v = &s->vhost_vdpa;
30
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
25
+ const VirtIONet *n;
31
NetQueue *incoming_queue;
26
+ uint64_t features;
32
/* hashtable to save connection */
33
GHashTable *connection_track_table;
34
+ bool vnet_hdr;
35
} RewriterState;
36
37
static void filter_rewriter_flush(NetFilterState *nf)
38
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
39
ConnectionKey key;
40
Packet *pkt;
41
ssize_t size = iov_size(iov, iovcnt);
42
+ ssize_t vnet_hdr_len = 0;
43
char *buf = g_malloc0(size);
44
45
iov_to_buf(iov, iovcnt, 0, buf, size);
46
- pkt = packet_new(buf, size, 0);
47
+
27
+
48
+ if (s->vnet_hdr) {
28
+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
49
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
29
+
30
+ if (!v->shadow_vqs_enabled) {
31
+ return 0;
50
+ }
32
+ }
51
+
33
+
52
+ pkt = packet_new(buf, size, vnet_hdr_len);
34
+ n = VIRTIO_NET(v->dev->vdev);
53
g_free(buf);
35
+ features = n->parent_obj.guest_features;
54
36
+ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
55
/*
37
+ const struct virtio_net_ctrl_hdr ctrl = {
56
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
38
+ .class = VIRTIO_NET_CTRL_MAC,
57
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
39
+ .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET,
58
}
40
+ };
59
41
+ char *cursor = s->cvq_cmd_out_buffer;
60
+static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
42
+ ssize_t dev_written;
61
+{
62
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
63
+
43
+
64
+ return s->vnet_hdr;
44
+ memcpy(cursor, &ctrl, sizeof(ctrl));
45
+ cursor += sizeof(ctrl);
46
+ memcpy(cursor, n->mac, sizeof(n->mac));
47
+
48
+ dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac),
49
+ sizeof(virtio_net_ctrl_ack));
50
+ if (unlikely(dev_written < 0)) {
51
+ return dev_written;
52
+ }
53
+
54
+ return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK;
55
+ }
56
+
57
+ return 0;
65
+}
58
+}
66
+
59
+
67
+static void filter_rewriter_set_vnet_hdr(Object *obj,
60
static NetClientInfo net_vhost_vdpa_cvq_info = {
68
+ bool value,
61
.type = NET_CLIENT_DRIVER_VHOST_VDPA,
69
+ Error **errp)
62
.size = sizeof(VhostVDPAState),
70
+{
63
.receive = vhost_vdpa_receive,
71
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
64
.start = vhost_vdpa_net_cvq_start,
72
+
65
+ .load = vhost_vdpa_net_load,
73
+ s->vnet_hdr = value;
66
.stop = vhost_vdpa_net_cvq_stop,
74
+}
67
.cleanup = vhost_vdpa_cleanup,
75
+
68
.has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
76
+static void filter_rewriter_init(Object *obj)
77
+{
78
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
79
+
80
+ s->vnet_hdr = false;
81
+ object_property_add_bool(obj, "vnet_hdr_support",
82
+ filter_rewriter_get_vnet_hdr,
83
+ filter_rewriter_set_vnet_hdr, NULL);
84
+}
85
+
86
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
87
{
88
NetFilterClass *nfc = NETFILTER_CLASS(oc);
89
@@ -XXX,XX +XXX,XX @@ static const TypeInfo colo_rewriter_info = {
90
.name = TYPE_FILTER_REWRITER,
91
.parent = TYPE_NETFILTER,
92
.class_init = colo_rewriter_class_init,
93
+ .instance_init = filter_rewriter_init,
94
.instance_size = sizeof(RewriterState),
95
};
96
97
diff --git a/qemu-options.hx b/qemu-options.hx
98
index XXXXXXX..XXXXXXX 100644
99
--- a/qemu-options.hx
100
+++ b/qemu-options.hx
101
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
102
be the same. we can just use indev or outdev, but at least one of indev or outdev
103
need to be specified.
104
105
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}]
106
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
107
108
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
109
secondary from primary to keep secondary tcp connection,and rewrite
110
tcp packet to primary from secondary make tcp packet can be handled by
111
-client.
112
+client.if it has the vnet_hdr_support flag, we can parse packet with vnet header.
113
114
usage:
115
colo secondary:
116
--
69
--
117
2.7.4
70
2.7.4
118
71
119
72
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-mirror, default is disabled.
3
We can restore the device state in the destination via CVQ now. Remove
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
the migration blocker.
5
You can use it for example:
6
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
7
5
8
If it has vnet_hdr_support flag, we will change the sending packet format from
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}.
7
Acked-by: Jason Wang <jasowang@redhat.com>
10
make other module(like colo-compare) know how to parse net packet correctly.
11
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
9
---
15
net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++-
10
hw/virtio/vhost-vdpa.c | 15 ---------------
16
qemu-options.hx | 5 ++---
11
include/hw/virtio/vhost-vdpa.h | 1 -
17
2 files changed, 43 insertions(+), 4 deletions(-)
12
net/vhost-vdpa.c | 2 --
13
3 files changed, 18 deletions(-)
18
14
19
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/net/filter-mirror.c
17
--- a/hw/virtio/vhost-vdpa.c
22
+++ b/net/filter-mirror.c
18
+++ b/hw/virtio/vhost-vdpa.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
19
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
24
CharBackend chr_in;
20
return true;
25
CharBackend chr_out;
26
SocketReadState rs;
27
+ bool vnet_hdr;
28
} MirrorState;
29
30
static int filter_send(MirrorState *s,
31
const struct iovec *iov,
32
int iovcnt)
33
{
34
+ NetFilterState *nf = NETFILTER(s);
35
int ret = 0;
36
ssize_t size = 0;
37
uint32_t len = 0;
38
@@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s,
39
goto err;
40
}
21
}
41
22
42
+ if (s->vnet_hdr) {
23
- if (v->migration_blocker) {
43
+ /*
24
- int r = migrate_add_blocker(v->migration_blocker, &err);
44
+ * If vnet_hdr = on, we send vnet header len to make other
25
- if (unlikely(r < 0)) {
45
+ * module(like colo-compare) know how to parse net
26
- return false;
46
+ * packet correctly.
27
- }
47
+ */
28
- }
48
+ ssize_t vnet_hdr_len;
29
-
49
+
30
for (i = 0; i < v->shadow_vqs->len; ++i) {
50
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
31
VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
51
+
32
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
52
+ len = htonl(vnet_hdr_len);
33
@@ -XXX,XX +XXX,XX @@ err:
53
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
34
vhost_svq_stop(svq);
54
+ if (ret != sizeof(len)) {
55
+ goto err;
56
+ }
57
+ }
58
+
59
buf = g_malloc(size);
60
iov_to_buf(iov, iovcnt, 0, buf, size);
61
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
62
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
63
}
64
}
35
}
65
36
66
- net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
37
- if (v->migration_blocker) {
67
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
38
- migrate_del_blocker(v->migration_blocker);
68
39
- }
69
if (s->indev) {
40
-
70
chr = qemu_chr_find(s->indev);
41
return false;
71
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_set_outdev(Object *obj,
42
}
43
44
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
45
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
46
vhost_vdpa_svq_unmap_rings(dev, svq);
72
}
47
}
48
-
49
- if (v->migration_blocker) {
50
- migrate_del_blocker(v->migration_blocker);
51
- }
73
}
52
}
74
53
75
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
54
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
76
+{
55
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
77
+ MirrorState *s = FILTER_MIRROR(obj);
78
+
79
+ return s->vnet_hdr;
80
+}
81
+
82
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
83
+{
84
+ MirrorState *s = FILTER_MIRROR(obj);
85
+
86
+ s->vnet_hdr = value;
87
+}
88
+
89
static char *filter_redirector_get_outdev(Object *obj, Error **errp)
90
{
91
MirrorState *s = FILTER_REDIRECTOR(obj);
92
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
93
94
static void filter_mirror_init(Object *obj)
95
{
96
+ MirrorState *s = FILTER_MIRROR(obj);
97
+
98
object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
99
filter_mirror_set_outdev, NULL);
100
+
101
+ s->vnet_hdr = false;
102
+ object_property_add_bool(obj, "vnet_hdr_support",
103
+ filter_mirror_get_vnet_hdr,
104
+ filter_mirror_set_vnet_hdr, NULL);
105
}
106
107
static void filter_redirector_init(Object *obj)
108
diff --git a/qemu-options.hx b/qemu-options.hx
109
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
110
--- a/qemu-options.hx
57
--- a/include/hw/virtio/vhost-vdpa.h
111
+++ b/qemu-options.hx
58
+++ b/include/hw/virtio/vhost-vdpa.h
112
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
59
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
113
@option{tx}: the filter is attached to the transmit queue of the netdev,
60
bool shadow_vqs_enabled;
114
where it will receive packets sent by the netdev.
61
/* IOVA mapping used by the Shadow Virtqueue */
115
62
VhostIOVATree *iova_tree;
116
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
63
- Error *migration_blocker;
117
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
64
GPtrArray *shadow_vqs;
118
65
const VhostShadowVirtqueueOps *shadow_vq_ops;
119
-filter-mirror on netdev @var{netdevid},mirror net packet to chardev
66
void *shadow_vq_ops_opaque;
120
-@var{chardevid}
67
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
121
+filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
68
index XXXXXXX..XXXXXXX 100644
122
69
--- a/net/vhost-vdpa.c
123
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
70
+++ b/net/vhost-vdpa.c
124
outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
71
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
72
73
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
74
s->vhost_vdpa.shadow_vq_ops_opaque = s;
75
- error_setg(&s->vhost_vdpa.migration_blocker,
76
- "Migration disabled: vhost-vdpa uses CVQ.");
77
}
78
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
79
if (ret) {
125
--
80
--
126
2.7.4
81
2.7.4
127
82
128
83
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <chen.zhang@intel.com>
2
2
3
Make colo-compare and filter-rewriter can parse vnet packet.
3
When enabled the virtio-net-pci, guest network packet will
4
load the vnet_hdr. In COLO status, the primary VM's network
5
packet maybe redirect to another VM, it needs filter-redirect
6
enable the vnet_hdr flag at the same time, COLO-proxy will
7
correctly parse the original network packet. If have any
8
misconfiguration here, the vnet_hdr_len is wrong for parse
9
the packet, the data+offset will point to wrong place.
4
10
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
11
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
13
---
8
net/colo.c | 6 +++---
14
net/colo.c | 25 ++++++++++++++++---------
9
1 file changed, 3 insertions(+), 3 deletions(-)
15
net/colo.h | 1 +
16
net/trace-events | 2 +-
17
3 files changed, 18 insertions(+), 10 deletions(-)
10
18
11
diff --git a/net/colo.c b/net/colo.c
19
diff --git a/net/colo.c b/net/colo.c
12
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo.c
21
--- a/net/colo.c
14
+++ b/net/colo.c
22
+++ b/net/colo.c
15
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
23
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
16
{
24
{
17
int network_length;
25
int network_length;
18
static const uint8_t vlan[] = {0x81, 0x00};
26
static const uint8_t vlan[] = {0x81, 0x00};
19
- uint8_t *data = pkt->data;
27
- uint8_t *data = pkt->data + pkt->vnet_hdr_len;
20
+ uint8_t *data = pkt->data + pkt->vnet_hdr_len;
28
+ uint8_t *data = pkt->data;
21
uint16_t l3_proto;
29
uint16_t l3_proto;
22
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
30
ssize_t l2hdr_len;
23
31
24
- if (pkt->size < ETH_HLEN) {
32
- if (data == NULL) {
25
+ if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
33
- trace_colo_proxy_main_vnet_info("This packet is not parsed correctly, "
26
trace_colo_proxy_main("pkt->size < ETH_HLEN");
34
- "pkt->vnet_hdr_len", pkt->vnet_hdr_len);
35
+ assert(data);
36
+
37
+ /* Check the received vnet_hdr_len then add the offset */
38
+ if ((pkt->vnet_hdr_len > sizeof(struct virtio_net_hdr_v1_hash)) ||
39
+ (pkt->size < sizeof(struct eth_header) + sizeof(struct vlan_header) +
40
+ pkt->vnet_hdr_len)) {
41
+ /*
42
+ * The received remote packet maybe misconfiguration here,
43
+ * Please enable/disable filter module's the vnet_hdr flag at
44
+ * the same time.
45
+ */
46
+ trace_colo_proxy_main_vnet_info("This received packet load wrong ",
47
+ pkt->vnet_hdr_len, pkt->size);
27
return 1;
48
return 1;
28
}
49
}
29
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
50
- l2hdr_len = eth_get_l2_hdr_length(data);
30
}
51
+ data += pkt->vnet_hdr_len;
31
52
32
network_length = pkt->ip->ip_hl * 4;
53
- if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
33
- if (pkt->size < l2hdr_len + network_length) {
54
- trace_colo_proxy_main("pkt->size < ETH_HLEN");
34
+ if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
55
- return 1;
35
trace_colo_proxy_main("pkt->size < network_header + network_length");
56
- }
36
return 1;
57
+ l2hdr_len = eth_get_l2_hdr_length(data);
37
}
58
59
/*
60
* TODO: support vlan.
61
diff --git a/net/colo.h b/net/colo.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/net/colo.h
64
+++ b/net/colo.h
65
@@ -XXX,XX +XXX,XX @@
66
#include "qemu/jhash.h"
67
#include "qemu/timer.h"
68
#include "net/eth.h"
69
+#include "standard-headers/linux/virtio_net.h"
70
71
#define HASHTABLE_MAX_SIZE 16384
72
73
diff --git a/net/trace-events b/net/trace-events
74
index XXXXXXX..XXXXXXX 100644
75
--- a/net/trace-events
76
+++ b/net/trace-events
77
@@ -XXX,XX +XXX,XX @@ vhost_user_event(const char *chr, int event) "chr: %s got event: %d"
78
79
# colo.c
80
colo_proxy_main(const char *chr) ": %s"
81
-colo_proxy_main_vnet_info(const char *sta, int size) ": %s = %d"
82
+colo_proxy_main_vnet_info(const char *sta, uint32_t vnet_hdr, int size) ": %s pkt->vnet_hdr_len = %u, pkt->size = %d"
83
84
# colo-compare.c
85
colo_compare_main(const char *chr) ": %s"
38
--
86
--
39
2.7.4
87
2.7.4
40
41
diff view generated by jsdifflib
New patch
1
From: Zheyu Ma <zheyuma97@gmail.com>
1
2
3
The DMA engine is started by I/O access and then itself accesses the
4
I/O registers, triggering a reentrancy bug.
5
6
The following log can reveal it:
7
==5637==ERROR: AddressSanitizer: stack-overflow
8
#0 0x5595435f6078 in tulip_xmit_list_update qemu/hw/net/tulip.c:673
9
#1 0x5595435f204a in tulip_write qemu/hw/net/tulip.c:805:13
10
#2 0x559544637f86 in memory_region_write_accessor qemu/softmmu/memory.c:492:5
11
#3 0x5595446379fa in access_with_adjusted_size qemu/softmmu/memory.c:554:18
12
#4 0x5595446372fa in memory_region_dispatch_write qemu/softmmu/memory.c
13
#5 0x55954468b74c in flatview_write_continue qemu/softmmu/physmem.c:2825:23
14
#6 0x559544683662 in flatview_write qemu/softmmu/physmem.c:2867:12
15
#7 0x5595446833f3 in address_space_write qemu/softmmu/physmem.c:2963:18
16
#8 0x5595435fb082 in dma_memory_rw_relaxed qemu/include/sysemu/dma.h:87:12
17
#9 0x5595435fb082 in dma_memory_rw qemu/include/sysemu/dma.h:130:12
18
#10 0x5595435fb082 in dma_memory_write qemu/include/sysemu/dma.h:171:12
19
#11 0x5595435fb082 in stl_le_dma qemu/include/sysemu/dma.h:272:1
20
#12 0x5595435fb082 in stl_le_pci_dma qemu/include/hw/pci/pci.h:910:1
21
#13 0x5595435fb082 in tulip_desc_write qemu/hw/net/tulip.c:101:9
22
#14 0x5595435f7e3d in tulip_xmit_list_update qemu/hw/net/tulip.c:706:9
23
#15 0x5595435f204a in tulip_write qemu/hw/net/tulip.c:805:13
24
25
Fix this bug by restricting the DMA engine to memories regions.
26
27
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
28
Signed-off-by: Jason Wang <jasowang@redhat.com>
29
---
30
hw/net/tulip.c | 4 ++--
31
1 file changed, 2 insertions(+), 2 deletions(-)
32
33
diff --git a/hw/net/tulip.c b/hw/net/tulip.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/net/tulip.c
36
+++ b/hw/net/tulip.c
37
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_pci_tulip = {
38
static void tulip_desc_read(TULIPState *s, hwaddr p,
39
struct tulip_descriptor *desc)
40
{
41
- const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
42
+ const MemTxAttrs attrs = { .memory = true };
43
44
if (s->csr[0] & CSR0_DBO) {
45
ldl_be_pci_dma(&s->dev, p, &desc->status, attrs);
46
@@ -XXX,XX +XXX,XX @@ static void tulip_desc_read(TULIPState *s, hwaddr p,
47
static void tulip_desc_write(TULIPState *s, hwaddr p,
48
struct tulip_descriptor *desc)
49
{
50
- const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
51
+ const MemTxAttrs attrs = { .memory = true };
52
53
if (s->csr[0] & CSR0_DBO) {
54
stl_be_pci_dma(&s->dev, p, desc->status, attrs);
55
--
56
2.7.4
diff view generated by jsdifflib