1
The following changes since commit e0175b71638cf4398903c0d25f93fe62e0606389:
1
The following changes since commit 92f8c6fef13b31ba222c4d20ad8afd2b79c4c28e:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200228' into staging (2020-02-28 16:39:27 +0000)
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210525' into staging (2021-05-25 16:17:06 +0100)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 41aa2e3f9b27fd259a13711545d933a20f1d2f16:
9
for you to fetch changes up to 90322e646e87c1440661cb3ddbc0cc94309d8a4f:
10
10
11
l2tpv3: fix RFC number typo in qemu-options.hx (2020-03-02 15:30:08 +0800)
11
MAINTAINERS: Added eBPF maintainers information. (2021-06-04 15:25:46 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Bin Meng (1):
16
Andrew Melnychenko (7):
17
hw: net: cadence_gem: Fix build errors in DB_PRINT()
17
net/tap: Added TUNSETSTEERINGEBPF code.
18
net: Added SetSteeringEBPF method for NetClientState.
19
ebpf: Added eBPF RSS program.
20
ebpf: Added eBPF RSS loader.
21
virtio-net: Added eBPF RSS to virtio-net.
22
docs: Added eBPF documentation.
23
MAINTAINERS: Added eBPF maintainers information.
18
24
19
Finn Thain (14):
25
MAINTAINERS | 8 +
20
dp8393x: Mask EOL bit from descriptor addresses
26
configure | 8 +-
21
dp8393x: Always use 32-bit accesses
27
docs/devel/ebpf_rss.rst | 125 +++++++++
22
dp8393x: Clean up endianness hacks
28
docs/devel/index.rst | 1 +
23
dp8393x: Have dp8393x_receive() return the packet size
29
ebpf/ebpf_rss-stub.c | 40 +++
24
dp8393x: Update LLFA and CRDA registers from rx descriptor
30
ebpf/ebpf_rss.c | 165 ++++++++++++
25
dp8393x: Clear RRRA command register bit only when appropriate
31
ebpf/ebpf_rss.h | 44 ++++
26
dp8393x: Implement packet size limit and RBAE interrupt
32
ebpf/meson.build | 1 +
27
dp8393x: Don't clobber packet checksum
33
ebpf/rss.bpf.skeleton.h | 431 +++++++++++++++++++++++++++++++
28
dp8393x: Use long-word-aligned RRA pointers in 32-bit mode
34
ebpf/trace-events | 4 +
29
dp8393x: Pad frames to word or long word boundary
35
ebpf/trace.h | 1 +
30
dp8393x: Clear descriptor in_use field to release packet
36
hw/net/vhost_net.c | 3 +
31
dp8393x: Always update RRA pointers and sequence numbers
37
hw/net/virtio-net.c | 116 ++++++++-
32
dp8393x: Don't reset Silicon Revision register
38
include/hw/virtio/virtio-net.h | 4 +
33
dp8393x: Don't stop reception upon RBE interrupt assertion
39
include/net/net.h | 2 +
34
40
meson.build | 23 ++
35
Lukas Straub (4):
41
meson_options.txt | 2 +
36
block/replication.c: Ignore requests after failover
42
net/tap-bsd.c | 5 +
37
tests/test-replication.c: Add test for for secondary node continuing replication
43
net/tap-linux.c | 13 +
38
net/filter.c: Add Options to insert filters anywhere in the filter list
44
net/tap-linux.h | 1 +
39
colo: Update Documentation for continuous replication
45
net/tap-solaris.c | 5 +
40
46
net/tap-stub.c | 5 +
41
Stefan Hajnoczi (1):
47
net/tap.c | 9 +
42
l2tpv3: fix RFC number typo in qemu-options.hx
48
net/tap_int.h | 1 +
43
49
net/vhost-vdpa.c | 2 +
44
Yuri Benditovich (3):
50
tools/ebpf/Makefile.ebpf | 21 ++
45
e1000e: Avoid hw_error if legacy mode used
51
tools/ebpf/rss.bpf.c | 571 +++++++++++++++++++++++++++++++++++++++++
46
NetRxPkt: Introduce support for additional hash types
52
27 files changed, 1607 insertions(+), 4 deletions(-)
47
NetRxPkt: fix hash calculation of IPV6 TCP
53
create mode 100644 docs/devel/ebpf_rss.rst
48
54
create mode 100644 ebpf/ebpf_rss-stub.c
49
block/replication.c | 35 ++++++-
55
create mode 100644 ebpf/ebpf_rss.c
50
docs/COLO-FT.txt | 224 +++++++++++++++++++++++++++++++++------------
56
create mode 100644 ebpf/ebpf_rss.h
51
docs/block-replication.txt | 28 ++++--
57
create mode 100644 ebpf/meson.build
52
hw/net/cadence_gem.c | 11 ++-
58
create mode 100644 ebpf/rss.bpf.skeleton.h
53
hw/net/dp8393x.c | 200 ++++++++++++++++++++++++++--------------
59
create mode 100644 ebpf/trace-events
54
hw/net/e1000e_core.c | 15 +--
60
create mode 100644 ebpf/trace.h
55
hw/net/net_rx_pkt.c | 44 ++++++++-
61
create mode 100755 tools/ebpf/Makefile.ebpf
56
hw/net/net_rx_pkt.h | 6 +-
62
create mode 100644 tools/ebpf/rss.bpf.c
57
hw/net/trace-events | 4 +
58
include/net/filter.h | 2 +
59
net/filter.c | 92 ++++++++++++++++++-
60
qemu-options.hx | 35 +++++--
61
tests/test-replication.c | 52 +++++++++++
62
13 files changed, 591 insertions(+), 157 deletions(-)
63
63
64
64
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
The Least Significant bit of a descriptor address register is used as
4
an EOL flag. It has to be masked when the register value is to be used
5
as an actual address for copying memory around. But when the registers
6
are to be updated the EOL bit should not be masked.
7
8
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
9
Tested-by: Laurent Vivier <laurent@vivier.eu>
10
---
11
hw/net/dp8393x.c | 17 +++++++++++------
12
1 file changed, 11 insertions(+), 6 deletions(-)
13
14
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/dp8393x.c
17
+++ b/hw/net/dp8393x.c
18
@@ -XXX,XX +XXX,XX @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
19
#define SONIC_ISR_PINT 0x0800
20
#define SONIC_ISR_LCD 0x1000
21
22
+#define SONIC_DESC_EOL 0x0001
23
+#define SONIC_DESC_ADDR 0xFFFE
24
+
25
#define TYPE_DP8393X "dp8393x"
26
#define DP8393X(obj) OBJECT_CHECK(dp8393xState, (obj), TYPE_DP8393X)
27
28
@@ -XXX,XX +XXX,XX @@ static uint32_t dp8393x_crba(dp8393xState *s)
29
30
static uint32_t dp8393x_crda(dp8393xState *s)
31
{
32
- return (s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA];
33
+ return (s->regs[SONIC_URDA] << 16) |
34
+ (s->regs[SONIC_CRDA] & SONIC_DESC_ADDR);
35
}
36
37
static uint32_t dp8393x_rbwc(dp8393xState *s)
38
@@ -XXX,XX +XXX,XX @@ static uint32_t dp8393x_tsa(dp8393xState *s)
39
40
static uint32_t dp8393x_ttda(dp8393xState *s)
41
{
42
- return (s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA];
43
+ return (s->regs[SONIC_UTDA] << 16) |
44
+ (s->regs[SONIC_TTDA] & SONIC_DESC_ADDR);
45
}
46
47
static uint32_t dp8393x_wt(dp8393xState *s)
48
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
49
MEMTXATTRS_UNSPECIFIED, s->data,
50
size);
51
s->regs[SONIC_CTDA] = dp8393x_get(s, width, 0) & ~0x1;
52
- if (dp8393x_get(s, width, 0) & 0x1) {
53
+ if (dp8393x_get(s, width, 0) & SONIC_DESC_EOL) {
54
/* EOL detected */
55
break;
56
}
57
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
58
/* XXX: Check byte ordering */
59
60
/* Check for EOL */
61
- if (s->regs[SONIC_LLFA] & 0x1) {
62
+ if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
63
/* Are we still in resource exhaustion? */
64
size = sizeof(uint16_t) * 1 * width;
65
address = dp8393x_crda(s) + sizeof(uint16_t) * 5 * width;
66
address_space_read(&s->as, address, MEMTXATTRS_UNSPECIFIED,
67
s->data, size);
68
- if (dp8393x_get(s, width, 0) & 0x1) {
69
+ if (dp8393x_get(s, width, 0) & SONIC_DESC_EOL) {
70
/* Still EOL ; stop reception */
71
return -1;
72
} else {
73
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
74
dp8393x_crda(s) + sizeof(uint16_t) * 5 * width,
75
MEMTXATTRS_UNSPECIFIED, s->data, size);
76
s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
77
- if (s->regs[SONIC_LLFA] & 0x1) {
78
+ if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
79
/* EOL detected */
80
s->regs[SONIC_ISR] |= SONIC_ISR_RDE;
81
} else {
82
--
83
2.5.0
84
85
diff view generated by jsdifflib
1
From: Finn Thain <fthain@telegraphics.com.au>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
The DP83932 and DP83934 have 32 data lines. The datasheet says,
3
Additional code that will be used for eBPF setting steering routine.
4
4
5
Data Bus: These bidirectional lines are used to transfer data on the
5
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
6
system bus. When the SONIC is a bus master, 16-bit data is transferred
7
on D15-D0 and 32-bit data is transferred on D31-D0. When the SONIC is
8
accessed as a slave, register data is driven onto lines D15-D0.
9
D31-D16 are held TRI-STATE if SONIC is in 16-bit mode. If SONIC is in
10
32-bit mode, they are driven, but invalid.
11
12
Always use 32-bit accesses both as bus master and bus slave.
13
14
Force the MSW to zero in bus master mode.
15
16
This gets the Linux 'jazzsonic' driver working, and avoids the need for
17
prior hacks to make the NetBSD 'sn' driver work.
18
19
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
20
Tested-by: Laurent Vivier <laurent@vivier.eu>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
7
---
23
hw/net/dp8393x.c | 47 +++++++++++++++++++++++++++++------------------
8
net/tap-linux.h | 1 +
24
1 file changed, 29 insertions(+), 18 deletions(-)
9
1 file changed, 1 insertion(+)
25
10
26
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
11
diff --git a/net/tap-linux.h b/net/tap-linux.h
27
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/net/dp8393x.c
13
--- a/net/tap-linux.h
29
+++ b/hw/net/dp8393x.c
14
+++ b/net/tap-linux.h
30
@@ -XXX,XX +XXX,XX @@ static void dp8393x_put(dp8393xState *s, int width, int offset,
15
@@ -XXX,XX +XXX,XX @@
31
uint16_t val)
16
#define TUNSETQUEUE _IOW('T', 217, int)
32
{
17
#define TUNSETVNETLE _IOW('T', 220, int)
33
if (s->big_endian) {
18
#define TUNSETVNETBE _IOW('T', 222, int)
34
- s->data[offset * width + width - 1] = cpu_to_be16(val);
19
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
35
+ if (width == 2) {
20
36
+ s->data[offset * 2] = 0;
21
#endif
37
+ s->data[offset * 2 + 1] = cpu_to_be16(val);
38
+ } else {
39
+ s->data[offset] = cpu_to_be16(val);
40
+ }
41
} else {
42
- s->data[offset * width] = cpu_to_le16(val);
43
+ if (width == 2) {
44
+ s->data[offset * 2] = cpu_to_le16(val);
45
+ s->data[offset * 2 + 1] = 0;
46
+ } else {
47
+ s->data[offset] = cpu_to_le16(val);
48
+ }
49
}
50
}
51
52
@@ -XXX,XX +XXX,XX @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
53
54
DPRINTF("read 0x%04x from reg %s\n", val, reg_names[reg]);
55
56
- return val;
57
+ return s->big_endian ? val << 16 : val;
58
}
59
60
static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
61
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
62
{
63
dp8393xState *s = opaque;
64
int reg = addr >> s->it_shift;
65
+ uint32_t val = s->big_endian ? data >> 16 : data;
66
67
- DPRINTF("write 0x%04x to reg %s\n", (uint16_t)data, reg_names[reg]);
68
+ DPRINTF("write 0x%04x to reg %s\n", (uint16_t)val, reg_names[reg]);
69
70
switch (reg) {
71
/* Command register */
72
case SONIC_CR:
73
- dp8393x_do_command(s, data);
74
+ dp8393x_do_command(s, val);
75
break;
76
/* Prevent write to read-only registers */
77
case SONIC_CAP2:
78
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
79
/* Accept write to some registers only when in reset mode */
80
case SONIC_DCR:
81
if (s->regs[SONIC_CR] & SONIC_CR_RST) {
82
- s->regs[reg] = data & 0xbfff;
83
+ s->regs[reg] = val & 0xbfff;
84
} else {
85
DPRINTF("writing to DCR invalid\n");
86
}
87
break;
88
case SONIC_DCR2:
89
if (s->regs[SONIC_CR] & SONIC_CR_RST) {
90
- s->regs[reg] = data & 0xf017;
91
+ s->regs[reg] = val & 0xf017;
92
} else {
93
DPRINTF("writing to DCR2 invalid\n");
94
}
95
break;
96
/* 12 lower bytes are Read Only */
97
case SONIC_TCR:
98
- s->regs[reg] = data & 0xf000;
99
+ s->regs[reg] = val & 0xf000;
100
break;
101
/* 9 lower bytes are Read Only */
102
case SONIC_RCR:
103
- s->regs[reg] = data & 0xffe0;
104
+ s->regs[reg] = val & 0xffe0;
105
break;
106
/* Ignore most significant bit */
107
case SONIC_IMR:
108
- s->regs[reg] = data & 0x7fff;
109
+ s->regs[reg] = val & 0x7fff;
110
dp8393x_update_irq(s);
111
break;
112
/* Clear bits by writing 1 to them */
113
case SONIC_ISR:
114
- data &= s->regs[reg];
115
- s->regs[reg] &= ~data;
116
- if (data & SONIC_ISR_RBE) {
117
+ val &= s->regs[reg];
118
+ s->regs[reg] &= ~val;
119
+ if (val & SONIC_ISR_RBE) {
120
dp8393x_do_read_rra(s);
121
}
122
dp8393x_update_irq(s);
123
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
124
case SONIC_REA:
125
case SONIC_RRP:
126
case SONIC_RWP:
127
- s->regs[reg] = data & 0xfffe;
128
+ s->regs[reg] = val & 0xfffe;
129
break;
130
/* Invert written value for some registers */
131
case SONIC_CRCT:
132
case SONIC_FAET:
133
case SONIC_MPT:
134
- s->regs[reg] = data ^ 0xffff;
135
+ s->regs[reg] = val ^ 0xffff;
136
break;
137
/* All other registers have no special contrainst */
138
default:
139
- s->regs[reg] = data;
140
+ s->regs[reg] = val;
141
}
142
143
if (reg == SONIC_WT0 || reg == SONIC_WT1) {
144
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
145
static const MemoryRegionOps dp8393x_ops = {
146
.read = dp8393x_read,
147
.write = dp8393x_write,
148
- .impl.min_access_size = 2,
149
- .impl.max_access_size = 2,
150
+ .impl.min_access_size = 4,
151
+ .impl.max_access_size = 4,
152
.endianness = DEVICE_NATIVE_ENDIAN,
153
};
154
22
155
--
23
--
156
2.5.0
24
2.7.4
157
25
158
26
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
According to the datasheet, section 3.4.4, "in 32-bit mode ... the SONIC
4
always writes long words".
5
6
Therefore, use the same technique for the 'in_use' field that is used
7
everywhere else, and write the full long word.
8
9
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
10
Tested-by: Laurent Vivier <laurent@vivier.eu>
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/dp8393x.c | 17 ++++++-----------
15
1 file changed, 6 insertions(+), 11 deletions(-)
16
17
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/dp8393x.c
20
+++ b/hw/net/dp8393x.c
21
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
22
return -1;
23
}
24
25
- /* XXX: Check byte ordering */
26
-
27
/* Check for EOL */
28
if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
29
/* Are we still in resource exhaustion? */
30
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
31
/* EOL detected */
32
s->regs[SONIC_ISR] |= SONIC_ISR_RDE;
33
} else {
34
- /* Clear in_use, but it is always 16bit wide */
35
- int offset = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
36
- if (s->big_endian && width == 2) {
37
- /* we need to adjust the offset of the 16bit field */
38
- offset += sizeof(uint16_t);
39
- }
40
- s->data[0] = 0;
41
- address_space_write(&s->as, offset, MEMTXATTRS_UNSPECIFIED,
42
- s->data, sizeof(uint16_t));
43
+ /* Clear in_use */
44
+ size = sizeof(uint16_t) * width;
45
+ address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
46
+ dp8393x_put(s, width, 0, 0);
47
+ address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
48
+ s->data, size);
49
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
50
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
51
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
52
--
53
2.5.0
54
55
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
This function re-uses its 'size' argument as a scratch variable.
4
Instead, declare a local 'size' variable for that purpose so that the
5
function result doesn't get messed up.
6
7
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Tested-by: Laurent Vivier <laurent@vivier.eu>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/dp8393x.c | 9 +++++----
13
1 file changed, 5 insertions(+), 4 deletions(-)
14
15
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/dp8393x.c
18
+++ b/hw/net/dp8393x.c
19
@@ -XXX,XX +XXX,XX @@ static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf,
20
}
21
22
static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
23
- size_t size)
24
+ size_t pkt_size)
25
{
26
dp8393xState *s = qemu_get_nic_opaque(nc);
27
int packet_type;
28
uint32_t available, address;
29
- int width, rx_len = size;
30
+ int width, rx_len = pkt_size;
31
uint32_t checksum;
32
+ int size;
33
34
width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
35
36
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
37
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
38
39
- packet_type = dp8393x_receive_filter(s, buf, size);
40
+ packet_type = dp8393x_receive_filter(s, buf, pkt_size);
41
if (packet_type < 0) {
42
DPRINTF("packet not for netcard\n");
43
return -1;
44
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
45
/* Done */
46
dp8393x_update_irq(s);
47
48
- return size;
49
+ return pkt_size;
50
}
51
52
static void dp8393x_reset(DeviceState *dev)
53
--
54
2.5.0
55
56
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
Follow the algorithm given in the National Semiconductor DP83932C
4
datasheet in section 3.4.7:
5
6
At the next reception, the SONIC re-reads the last RXpkt.link field,
7
and updates its CRDA register to point to the next descriptor.
8
9
The chip is designed to allow the host to provide a new list of
10
descriptors in this way.
11
12
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
13
Tested-by: Laurent Vivier <laurent@vivier.eu>
14
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/net/dp8393x.c | 11 +++++++----
18
1 file changed, 7 insertions(+), 4 deletions(-)
19
20
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/net/dp8393x.c
23
+++ b/hw/net/dp8393x.c
24
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
25
address = dp8393x_crda(s) + sizeof(uint16_t) * 5 * width;
26
address_space_read(&s->as, address, MEMTXATTRS_UNSPECIFIED,
27
s->data, size);
28
- if (dp8393x_get(s, width, 0) & SONIC_DESC_EOL) {
29
+ s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
30
+ if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
31
/* Still EOL ; stop reception */
32
return -1;
33
- } else {
34
- s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
35
}
36
+ /* Link has been updated by host */
37
+ s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
38
}
39
40
/* Save current position */
41
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
42
MEMTXATTRS_UNSPECIFIED,
43
s->data, size);
44
45
- /* Move to next descriptor */
46
+ /* Check link field */
47
size = sizeof(uint16_t) * width;
48
address_space_read(&s->as,
49
dp8393x_crda(s) + sizeof(uint16_t) * 5 * width,
50
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
51
dp8393x_put(s, width, 0, 0);
52
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
53
s->data, size);
54
+
55
+ /* Move to next descriptor */
56
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
57
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
58
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
59
--
60
2.5.0
61
62
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
It doesn't make sense to clear the command register bit unless the
4
command was actually issued.
5
6
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Tested-by: Laurent Vivier <laurent@vivier.eu>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/dp8393x.c | 7 +++----
12
1 file changed, 3 insertions(+), 4 deletions(-)
13
14
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/dp8393x.c
17
+++ b/hw/net/dp8393x.c
18
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_read_rra(dp8393xState *s)
19
s->regs[SONIC_ISR] |= SONIC_ISR_RBE;
20
dp8393x_update_irq(s);
21
}
22
-
23
- /* Done */
24
- s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
25
}
26
27
static void dp8393x_do_software_reset(dp8393xState *s)
28
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_command(dp8393xState *s, uint16_t command)
29
dp8393x_do_start_timer(s);
30
if (command & SONIC_CR_RST)
31
dp8393x_do_software_reset(s);
32
- if (command & SONIC_CR_RRRA)
33
+ if (command & SONIC_CR_RRRA) {
34
dp8393x_do_read_rra(s);
35
+ s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
36
+ }
37
if (command & SONIC_CR_LCAM)
38
dp8393x_do_load_cam(s);
39
}
40
--
41
2.5.0
42
43
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
Add a bounds check to prevent a large packet from causing a buffer
4
overflow. This is defensive programming -- I haven't actually tried
5
sending an oversized packet or a jumbo ethernet frame.
6
7
The SONIC handles packets that are too big for the buffer by raising
8
the RBAE interrupt and dropping them. Linux uses that interrupt to
9
count dropped packets.
10
11
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
12
Tested-by: Laurent Vivier <laurent@vivier.eu>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
hw/net/dp8393x.c | 9 +++++++++
16
1 file changed, 9 insertions(+)
17
18
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/dp8393x.c
21
+++ b/hw/net/dp8393x.c
22
@@ -XXX,XX +XXX,XX @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
23
#define SONIC_TCR_CRCI 0x2000
24
#define SONIC_TCR_PINT 0x8000
25
26
+#define SONIC_ISR_RBAE 0x0010
27
#define SONIC_ISR_RBE 0x0020
28
#define SONIC_ISR_RDE 0x0040
29
#define SONIC_ISR_TC 0x0080
30
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
31
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
32
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
33
34
+ if (pkt_size + 4 > dp8393x_rbwc(s) * 2) {
35
+ DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
36
+ s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
37
+ dp8393x_update_irq(s);
38
+ dp8393x_do_read_rra(s);
39
+ return pkt_size;
40
+ }
41
+
42
packet_type = dp8393x_receive_filter(s, buf, pkt_size);
43
if (packet_type < 0) {
44
DPRINTF("packet not for netcard\n");
45
--
46
2.5.0
47
48
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
A received packet consumes pkt_size bytes in the buffer and the frame
4
checksum that's appended to it consumes another 4 bytes. The Receive
5
Buffer Address register takes the former quantity into account but
6
not the latter. So the next packet written to the buffer overwrites
7
the frame checksum. Fix this.
8
9
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Tested-by: Laurent Vivier <laurent@vivier.eu>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/dp8393x.c | 1 +
15
1 file changed, 1 insertion(+)
16
17
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/dp8393x.c
20
+++ b/hw/net/dp8393x.c
21
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
22
address += rx_len;
23
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
24
&checksum, 4);
25
+ address += 4;
26
rx_len += 4;
27
s->regs[SONIC_CRBA1] = address >> 16;
28
s->regs[SONIC_CRBA0] = address & 0xffff;
29
--
30
2.5.0
31
32
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
Section 3.4.1 of the datasheet says,
4
5
The alignment of the RRA is confined to either word or long word
6
boundaries, depending upon the data width mode. In 16-bit mode,
7
the RRA must be aligned to a word boundary (A0 is always zero)
8
and in 32-bit mode, the RRA is aligned to a long word boundary
9
(A0 and A1 are always zero).
10
11
This constraint has been implemented for 16-bit mode; implement it
12
for 32-bit mode too.
13
14
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
15
Tested-by: Laurent Vivier <laurent@vivier.eu>
16
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
19
hw/net/dp8393x.c | 8 ++++++--
20
1 file changed, 6 insertions(+), 2 deletions(-)
21
22
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/net/dp8393x.c
25
+++ b/hw/net/dp8393x.c
26
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
27
qemu_flush_queued_packets(qemu_get_queue(s->nic));
28
}
29
break;
30
- /* Ignore least significant bit */
31
+ /* The guest is required to store aligned pointers here */
32
case SONIC_RSA:
33
case SONIC_REA:
34
case SONIC_RRP:
35
case SONIC_RWP:
36
- s->regs[reg] = val & 0xfffe;
37
+ if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
38
+ s->regs[reg] = val & 0xfffc;
39
+ } else {
40
+ s->regs[reg] = val & 0xfffe;
41
+ }
42
break;
43
/* Invert written value for some registers */
44
case SONIC_CRCT:
45
--
46
2.5.0
47
48
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
The existing code has a bug where the Remaining Buffer Word Count (RBWC)
4
is calculated with a truncating division, which gives the wrong result
5
for odd-sized packets.
6
7
Section 1.4.1 of the datasheet says,
8
9
Once the end of the packet has been reached, the serializer will
10
fill out the last word (16-bit mode) or long word (32-bit mode)
11
if the last byte did not end on a word or long word boundary
12
respectively. The fill byte will be 0FFh.
13
14
Implement buffer padding so that buffer limits are correctly enforced.
15
16
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
17
Tested-by: Laurent Vivier <laurent@vivier.eu>
18
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
19
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
---
21
hw/net/dp8393x.c | 39 ++++++++++++++++++++++++++++-----------
22
1 file changed, 28 insertions(+), 11 deletions(-)
23
24
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/net/dp8393x.c
27
+++ b/hw/net/dp8393x.c
28
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
29
dp8393xState *s = qemu_get_nic_opaque(nc);
30
int packet_type;
31
uint32_t available, address;
32
- int width, rx_len = pkt_size;
33
+ int width, rx_len, padded_len;
34
uint32_t checksum;
35
int size;
36
37
- width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
38
-
39
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
40
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
41
42
- if (pkt_size + 4 > dp8393x_rbwc(s) * 2) {
43
+ rx_len = pkt_size + sizeof(checksum);
44
+ if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
45
+ width = 2;
46
+ padded_len = ((rx_len - 1) | 3) + 1;
47
+ } else {
48
+ width = 1;
49
+ padded_len = ((rx_len - 1) | 1) + 1;
50
+ }
51
+
52
+ if (padded_len > dp8393x_rbwc(s) * 2) {
53
DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
54
s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
55
dp8393x_update_irq(s);
56
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
57
s->regs[SONIC_TRBA0] = s->regs[SONIC_CRBA0];
58
59
/* Calculate the ethernet checksum */
60
- checksum = cpu_to_le32(crc32(0, buf, rx_len));
61
+ checksum = cpu_to_le32(crc32(0, buf, pkt_size));
62
63
/* Put packet into RBA */
64
DPRINTF("Receive packet at %08x\n", dp8393x_crba(s));
65
address = dp8393x_crba(s);
66
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
67
- buf, rx_len);
68
- address += rx_len;
69
+ buf, pkt_size);
70
+ address += pkt_size;
71
+
72
+ /* Put frame checksum into RBA */
73
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
74
- &checksum, 4);
75
- address += 4;
76
- rx_len += 4;
77
+ &checksum, sizeof(checksum));
78
+ address += sizeof(checksum);
79
+
80
+ /* Pad short packets to keep pointers aligned */
81
+ if (rx_len < padded_len) {
82
+ size = padded_len - rx_len;
83
+ address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
84
+ (uint8_t *)"\xFF\xFF\xFF", size, 1);
85
+ address += size;
86
+ }
87
+
88
s->regs[SONIC_CRBA1] = address >> 16;
89
s->regs[SONIC_CRBA0] = address & 0xffff;
90
available = dp8393x_rbwc(s);
91
- available -= rx_len / 2;
92
+ available -= padded_len >> 1;
93
s->regs[SONIC_RBWC1] = available >> 16;
94
s->regs[SONIC_RBWC0] = available & 0xffff;
95
96
--
97
2.5.0
98
99
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
When the SONIC receives a packet into the last available descriptor, it
4
retains ownership of that descriptor for as long as necessary.
5
6
Section 3.4.7 of the datasheet says,
7
8
When the system appends more descriptors, the SONIC releases ownership
9
of the descriptor after writing 0000h to the RXpkt.in_use field.
10
11
The packet can now be processed by the host, so raise a PKTRX interrupt,
12
just like the normal case.
13
14
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
15
Tested-by: Laurent Vivier <laurent@vivier.eu>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
hw/net/dp8393x.c | 10 ++++++++++
19
1 file changed, 10 insertions(+)
20
21
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/net/dp8393x.c
24
+++ b/hw/net/dp8393x.c
25
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
26
return -1;
27
}
28
/* Link has been updated by host */
29
+
30
+ /* Clear in_use */
31
+ size = sizeof(uint16_t) * width;
32
+ address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
33
+ dp8393x_put(s, width, 0, 0);
34
+ address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
35
+ (uint8_t *)s->data, size, 1);
36
+
37
+ /* Move to next descriptor */
38
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
39
+ s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
40
}
41
42
/* Save current position */
43
--
44
2.5.0
45
46
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
These operations need to take place regardless of whether or not
4
rx descriptors have been used up (that is, EOL flag was observed).
5
6
The algorithm is now the same for a packet that was withheld as for
7
a packet that was not.
8
9
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
10
Tested-by: Laurent Vivier <laurent@vivier.eu>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/net/dp8393x.c | 12 +++++++-----
14
1 file changed, 7 insertions(+), 5 deletions(-)
15
16
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/dp8393x.c
19
+++ b/hw/net/dp8393x.c
20
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
21
/* Move to next descriptor */
22
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
23
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
24
- s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
25
+ }
26
27
- if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
28
- /* Read next RRA */
29
- dp8393x_do_read_rra(s);
30
- }
31
+ s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) |
32
+ ((s->regs[SONIC_RSC] + 1) & 0x00ff);
33
+
34
+ if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
35
+ /* Read next RRA */
36
+ dp8393x_do_read_rra(s);
37
}
38
39
/* Done */
40
--
41
2.5.0
42
43
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
The jazzsonic driver in Linux uses the Silicon Revision register value
4
to probe the chip. The driver fails unless the SR register contains 4.
5
Unfortunately, reading this register in QEMU usually returns 0 because
6
the s->regs[] array gets wiped after a software reset.
7
8
Fixes: bd8f1ebce4 ("net/dp8393x: fix hardware reset")
9
Suggested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/net/dp8393x.c | 2 +-
14
1 file changed, 1 insertion(+), 1 deletion(-)
15
16
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/dp8393x.c
19
+++ b/hw/net/dp8393x.c
20
@@ -XXX,XX +XXX,XX @@ static void dp8393x_reset(DeviceState *dev)
21
timer_del(s->watchdog);
22
23
memset(s->regs, 0, sizeof(s->regs));
24
+ s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux/mips */
25
s->regs[SONIC_CR] = SONIC_CR_RST | SONIC_CR_STP | SONIC_CR_RXDIS;
26
s->regs[SONIC_DCR] &= ~(SONIC_DCR_EXBUS | SONIC_DCR_LBR);
27
s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | SONIC_RCR_RNT);
28
@@ -XXX,XX +XXX,XX @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
29
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
30
31
s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
32
- s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
33
34
memory_region_init_ram(&s->prom, OBJECT(dev),
35
"dp8393x-prom", SONIC_PROM_SIZE, &local_err);
36
--
37
2.5.0
38
39
diff view generated by jsdifflib
1
From: Finn Thain <fthain@telegraphics.com.au>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
Section 3.4.7 of the datasheet explains that,
3
For now, that method supported only by Linux TAP.
4
Linux TAP uses TUNSETSTEERINGEBPF ioctl.
4
5
5
The RBE bit in the Interrupt Status register is set when the
6
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
6
SONIC finishes using the second to last receive buffer and reads
7
the last RRA descriptor. Actually, the SONIC is not truly out of
8
resources, but gives the system an early warning of an impending
9
out of resources condition.
10
11
RBE does not mean actual receive buffer exhaustion, and reception should
12
not be stopped. This is important because Linux will not check and clear
13
the RBE interrupt until it receives another packet. But that won't
14
happen if can_receive returns false. This bug causes the SONIC to become
15
deaf (until reset).
16
17
Fix this with a new flag to indicate actual receive buffer exhaustion.
18
19
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
20
Tested-by: Laurent Vivier <laurent@vivier.eu>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
8
---
23
hw/net/dp8393x.c | 35 ++++++++++++++++++++++-------------
9
include/net/net.h | 2 ++
24
1 file changed, 22 insertions(+), 13 deletions(-)
10
net/tap-bsd.c | 5 +++++
11
net/tap-linux.c | 13 +++++++++++++
12
net/tap-solaris.c | 5 +++++
13
net/tap-stub.c | 5 +++++
14
net/tap.c | 9 +++++++++
15
net/tap_int.h | 1 +
16
7 files changed, 40 insertions(+)
25
17
26
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
18
diff --git a/include/net/net.h b/include/net/net.h
27
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/net/dp8393x.c
20
--- a/include/net/net.h
29
+++ b/hw/net/dp8393x.c
21
+++ b/include/net/net.h
30
@@ -XXX,XX +XXX,XX @@ typedef struct dp8393xState {
22
@@ -XXX,XX +XXX,XX @@ typedef int (SetVnetBE)(NetClientState *, bool);
31
/* Hardware */
23
typedef struct SocketReadState SocketReadState;
32
uint8_t it_shift;
24
typedef void (SocketReadStateFinalize)(SocketReadState *rs);
33
bool big_endian;
25
typedef void (NetAnnounce)(NetClientState *);
34
+ bool last_rba_is_full;
26
+typedef bool (SetSteeringEBPF)(NetClientState *, int);
35
qemu_irq irq;
27
36
#ifdef DEBUG_SONIC
28
typedef struct NetClientInfo {
37
int irq_level;
29
NetClientDriver type;
38
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_read_rra(dp8393xState *s)
30
@@ -XXX,XX +XXX,XX @@ typedef struct NetClientInfo {
39
s->regs[SONIC_RRP] = s->regs[SONIC_RSA];
31
SetVnetLE *set_vnet_le;
40
}
32
SetVnetBE *set_vnet_be;
41
33
NetAnnounce *announce;
42
- /* Check resource exhaustion */
34
+ SetSteeringEBPF *set_steering_ebpf;
43
+ /* Warn the host if CRBA now has the last available resource */
35
} NetClientInfo;
44
if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP])
36
45
{
37
struct NetClientState {
46
s->regs[SONIC_ISR] |= SONIC_ISR_RBE;
38
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
47
dp8393x_update_irq(s);
39
index XXXXXXX..XXXXXXX 100644
48
}
40
--- a/net/tap-bsd.c
41
+++ b/net/tap-bsd.c
42
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
43
{
44
return -1;
45
}
49
+
46
+
50
+ /* Allow packet reception */
47
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
51
+ s->last_rba_is_full = false;
48
+{
49
+ return -1;
50
+}
51
diff --git a/net/tap-linux.c b/net/tap-linux.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/net/tap-linux.c
54
+++ b/net/tap-linux.c
55
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
56
pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name);
57
return 0;
52
}
58
}
53
59
+
54
static void dp8393x_do_software_reset(dp8393xState *s)
60
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
55
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
61
+{
56
dp8393x_do_read_rra(s);
62
+ if (ioctl(fd, TUNSETSTEERINGEBPF, (void *) &prog_fd) != 0) {
57
}
63
+ error_report("Issue while setting TUNSETSTEERINGEBPF:"
58
dp8393x_update_irq(s);
64
+ " %s with fd: %d, prog_fd: %d",
59
- if (dp8393x_can_receive(s->nic->ncs)) {
65
+ strerror(errno), fd, prog_fd);
60
- qemu_flush_queued_packets(qemu_get_queue(s->nic));
66
+
61
- }
67
+ return -1;
62
break;
63
/* The guest is required to store aligned pointers here */
64
case SONIC_RSA:
65
@@ -XXX,XX +XXX,XX @@ static int dp8393x_can_receive(NetClientState *nc)
66
67
if (!(s->regs[SONIC_CR] & SONIC_CR_RXEN))
68
return 0;
69
- if (s->regs[SONIC_ISR] & SONIC_ISR_RBE)
70
- return 0;
71
return 1;
72
}
73
74
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
75
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
76
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
77
78
+ if (s->last_rba_is_full) {
79
+ return pkt_size;
80
+ }
68
+ }
81
+
69
+
82
rx_len = pkt_size + sizeof(checksum);
70
+ return 0;
83
if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
71
+}
84
width = 2;
72
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
85
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
73
index XXXXXXX..XXXXXXX 100644
86
DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
74
--- a/net/tap-solaris.c
87
s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
75
+++ b/net/tap-solaris.c
88
dp8393x_update_irq(s);
76
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
89
- dp8393x_do_read_rra(s);
77
{
90
- return pkt_size;
78
return -1;
91
+ s->regs[SONIC_RCR] |= SONIC_RCR_LPKT;
79
}
92
+ goto done;
93
}
94
95
packet_type = dp8393x_receive_filter(s, buf, pkt_size);
96
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
97
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
98
}
99
100
+ dp8393x_update_irq(s);
101
+
80
+
102
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) |
81
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
103
((s->regs[SONIC_RSC] + 1) & 0x00ff);
82
+{
104
83
+ return -1;
105
+done:
84
+}
85
diff --git a/net/tap-stub.c b/net/tap-stub.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/net/tap-stub.c
88
+++ b/net/tap-stub.c
89
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
90
{
91
return -1;
92
}
106
+
93
+
107
if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
94
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
108
- /* Read next RRA */
95
+{
109
- dp8393x_do_read_rra(s);
96
+ return -1;
110
+ if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) {
97
+}
111
+ /* Stop packet reception */
98
diff --git a/net/tap.c b/net/tap.c
112
+ s->last_rba_is_full = true;
99
index XXXXXXX..XXXXXXX 100644
113
+ } else {
100
--- a/net/tap.c
114
+ /* Read next resource */
101
+++ b/net/tap.c
115
+ dp8393x_do_read_rra(s);
102
@@ -XXX,XX +XXX,XX @@ static void tap_poll(NetClientState *nc, bool enable)
116
+ }
103
tap_write_poll(s, enable);
117
}
118
119
- /* Done */
120
- dp8393x_update_irq(s);
121
-
122
return pkt_size;
123
}
104
}
124
105
106
+static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
107
+{
108
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
109
+ assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
110
+
111
+ return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
112
+}
113
+
114
int tap_get_fd(NetClientState *nc)
115
{
116
TAPState *s = DO_UPCAST(TAPState, nc, nc);
117
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_tap_info = {
118
.set_vnet_hdr_len = tap_set_vnet_hdr_len,
119
.set_vnet_le = tap_set_vnet_le,
120
.set_vnet_be = tap_set_vnet_be,
121
+ .set_steering_ebpf = tap_set_steering_ebpf,
122
};
123
124
static TAPState *net_tap_fd_init(NetClientState *peer,
125
diff --git a/net/tap_int.h b/net/tap_int.h
126
index XXXXXXX..XXXXXXX 100644
127
--- a/net/tap_int.h
128
+++ b/net/tap_int.h
129
@@ -XXX,XX +XXX,XX @@ int tap_fd_set_vnet_be(int fd, int vnet_is_be);
130
int tap_fd_enable(int fd);
131
int tap_fd_disable(int fd);
132
int tap_fd_get_ifname(int fd, char *ifname);
133
+int tap_fd_set_steering_ebpf(int fd, int prog_fd);
134
135
#endif /* NET_TAP_INT_H */
125
--
136
--
126
2.5.0
137
2.7.4
127
138
128
139
diff view generated by jsdifflib
Deleted patch
1
From: Yuri Benditovich <yuri.benditovich@daynix.com>
2
1
3
https://bugzilla.redhat.com/show_bug.cgi?id=1787142
4
The emulation issues hw_error if PSRCTL register
5
is written, for example, with zero value.
6
Such configuration does not present any problem when
7
DTYP bits of RCTL register define legacy format of
8
transfer descriptors. Current commit discards check
9
for BSIZE0 and BSIZE1 when legacy mode used.
10
11
Acked-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
12
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
hw/net/e1000e_core.c | 13 ++++++++-----
16
1 file changed, 8 insertions(+), 5 deletions(-)
17
18
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/e1000e_core.c
21
+++ b/hw/net/e1000e_core.c
22
@@ -XXX,XX +XXX,XX @@ e1000e_set_eitr(E1000ECore *core, int index, uint32_t val)
23
static void
24
e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val)
25
{
26
- if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
27
- hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero");
28
- }
29
+ if (core->mac[RCTL] & E1000_RCTL_DTYP_MASK) {
30
+
31
+ if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
32
+ hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero");
33
+ }
34
35
- if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
36
- hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero");
37
+ if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
38
+ hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero");
39
+ }
40
}
41
42
core->mac[PSRCTL] = val;
43
--
44
2.5.0
45
46
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
The L2TPv3 RFC number is 3931:
3
RSS program and Makefile to build it.
4
https://tools.ietf.org/html/rfc3931
4
The bpftool used to generate '.h' file.
5
The data in that file may be loaded by libbpf.
6
EBPF compilation is not required for building qemu.
7
You can use Makefile if you need to regenerate rss.bpf.skeleton.h.
5
8
6
Reported-by: Henrik Johansson <henrikjohansson@rocketmail.com>
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
7
Reviewed-by: Stefan Weil <sw@weilnetz.de>
10
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
12
---
11
qemu-options.hx | 4 ++--
13
tools/ebpf/Makefile.ebpf | 21 ++
12
1 file changed, 2 insertions(+), 2 deletions(-)
14
tools/ebpf/rss.bpf.c | 571 +++++++++++++++++++++++++++++++++++++++++++++++
15
2 files changed, 592 insertions(+)
16
create mode 100755 tools/ebpf/Makefile.ebpf
17
create mode 100644 tools/ebpf/rss.bpf.c
13
18
14
diff --git a/qemu-options.hx b/qemu-options.hx
19
diff --git a/tools/ebpf/Makefile.ebpf b/tools/ebpf/Makefile.ebpf
15
index XXXXXXX..XXXXXXX 100644
20
new file mode 100755
16
--- a/qemu-options.hx
21
index XXXXXXX..XXXXXXX
17
+++ b/qemu-options.hx
22
--- /dev/null
18
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
23
+++ b/tools/ebpf/Makefile.ebpf
19
" Linux kernel 3.3+ as well as most routers can talk\n"
24
@@ -XXX,XX +XXX,XX @@
20
" L2TPv3. This transport allows connecting a VM to a VM,\n"
25
+OBJS = rss.bpf.o
21
" VM to a router and even VM to Host. It is a nearly-universal\n"
26
+
22
- " standard (RFC3391). Note - this implementation uses static\n"
27
+LLC ?= llc
23
+ " standard (RFC3931). Note - this implementation uses static\n"
28
+CLANG ?= clang
24
" pre-configured tunnels (same as the Linux kernel).\n"
29
+INC_FLAGS = `$(CLANG) -print-file-name=include`
25
" use 'src=' to specify source address\n"
30
+EXTRA_CFLAGS ?= -O2 -emit-llvm -fno-stack-protector
26
" use 'dst=' to specify destination address\n"
31
+
27
@@ -XXX,XX +XXX,XX @@ Example (send packets from host's 1.2.3.4):
32
+all: $(OBJS)
28
@end example
33
+
29
34
+.PHONY: clean
30
@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
35
+
31
-Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3391) is a
36
+clean:
32
+Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3931) is a
37
+    rm -f $(OBJS)
33
popular protocol to transport Ethernet (and other Layer 2) data frames between
38
+
34
two systems. It is present in routers, firewalls and the Linux kernel
39
+$(OBJS): %.o:%.c
35
(from version 3.3 onwards).
40
+    $(CLANG) $(INC_FLAGS) \
41
+ -D__KERNEL__ -D__ASM_SYSREG_H \
42
+ -I../include $(LINUXINCLUDE) \
43
+ $(EXTRA_CFLAGS) -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
44
+    bpftool gen skeleton rss.bpf.o > rss.bpf.skeleton.h
45
+    cp rss.bpf.skeleton.h ../../ebpf/
46
diff --git a/tools/ebpf/rss.bpf.c b/tools/ebpf/rss.bpf.c
47
new file mode 100644
48
index XXXXXXX..XXXXXXX
49
--- /dev/null
50
+++ b/tools/ebpf/rss.bpf.c
51
@@ -XXX,XX +XXX,XX @@
52
+/*
53
+ * eBPF RSS program
54
+ *
55
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
56
+ *
57
+ * Authors:
58
+ * Andrew Melnychenko <andrew@daynix.com>
59
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
60
+ *
61
+ * This work is licensed under the terms of the GNU GPL, version 2. See
62
+ * the COPYING file in the top-level directory.
63
+ *
64
+ * Prepare:
65
+ * Requires llvm, clang, bpftool, linux kernel tree
66
+ *
67
+ * Build rss.bpf.skeleton.h:
68
+ * make -f Makefile.ebpf clean all
69
+ */
70
+
71
+#include <stddef.h>
72
+#include <stdbool.h>
73
+#include <linux/bpf.h>
74
+
75
+#include <linux/in.h>
76
+#include <linux/if_ether.h>
77
+#include <linux/ip.h>
78
+#include <linux/ipv6.h>
79
+
80
+#include <linux/udp.h>
81
+#include <linux/tcp.h>
82
+
83
+#include <bpf/bpf_helpers.h>
84
+#include <bpf/bpf_endian.h>
85
+#include <linux/virtio_net.h>
86
+
87
+#define INDIRECTION_TABLE_SIZE 128
88
+#define HASH_CALCULATION_BUFFER_SIZE 36
89
+
90
+struct rss_config_t {
91
+ __u8 redirect;
92
+ __u8 populate_hash;
93
+ __u32 hash_types;
94
+ __u16 indirections_len;
95
+ __u16 default_queue;
96
+} __attribute__((packed));
97
+
98
+struct toeplitz_key_data_t {
99
+ __u32 leftmost_32_bits;
100
+ __u8 next_byte[HASH_CALCULATION_BUFFER_SIZE];
101
+};
102
+
103
+struct packet_hash_info_t {
104
+ __u8 is_ipv4;
105
+ __u8 is_ipv6;
106
+ __u8 is_udp;
107
+ __u8 is_tcp;
108
+ __u8 is_ipv6_ext_src;
109
+ __u8 is_ipv6_ext_dst;
110
+ __u8 is_fragmented;
111
+
112
+ __u16 src_port;
113
+ __u16 dst_port;
114
+
115
+ union {
116
+ struct {
117
+ __be32 in_src;
118
+ __be32 in_dst;
119
+ };
120
+
121
+ struct {
122
+ struct in6_addr in6_src;
123
+ struct in6_addr in6_dst;
124
+ struct in6_addr in6_ext_src;
125
+ struct in6_addr in6_ext_dst;
126
+ };
127
+ };
128
+};
129
+
130
+struct bpf_map_def SEC("maps")
131
+tap_rss_map_configurations = {
132
+ .type = BPF_MAP_TYPE_ARRAY,
133
+ .key_size = sizeof(__u32),
134
+ .value_size = sizeof(struct rss_config_t),
135
+ .max_entries = 1,
136
+};
137
+
138
+struct bpf_map_def SEC("maps")
139
+tap_rss_map_toeplitz_key = {
140
+ .type = BPF_MAP_TYPE_ARRAY,
141
+ .key_size = sizeof(__u32),
142
+ .value_size = sizeof(struct toeplitz_key_data_t),
143
+ .max_entries = 1,
144
+};
145
+
146
+struct bpf_map_def SEC("maps")
147
+tap_rss_map_indirection_table = {
148
+ .type = BPF_MAP_TYPE_ARRAY,
149
+ .key_size = sizeof(__u32),
150
+ .value_size = sizeof(__u16),
151
+ .max_entries = INDIRECTION_TABLE_SIZE,
152
+};
153
+
154
+static inline void net_rx_rss_add_chunk(__u8 *rss_input, size_t *bytes_written,
155
+ const void *ptr, size_t size) {
156
+ __builtin_memcpy(&rss_input[*bytes_written], ptr, size);
157
+ *bytes_written += size;
158
+}
159
+
160
+static inline
161
+void net_toeplitz_add(__u32 *result,
162
+ __u8 *input,
163
+ __u32 len
164
+ , struct toeplitz_key_data_t *key) {
165
+
166
+ __u32 accumulator = *result;
167
+ __u32 leftmost_32_bits = key->leftmost_32_bits;
168
+ __u32 byte;
169
+
170
+ for (byte = 0; byte < HASH_CALCULATION_BUFFER_SIZE; byte++) {
171
+ __u8 input_byte = input[byte];
172
+ __u8 key_byte = key->next_byte[byte];
173
+ __u8 bit;
174
+
175
+ for (bit = 0; bit < 8; bit++) {
176
+ if (input_byte & (1 << 7)) {
177
+ accumulator ^= leftmost_32_bits;
178
+ }
179
+
180
+ leftmost_32_bits =
181
+ (leftmost_32_bits << 1) | ((key_byte & (1 << 7)) >> 7);
182
+
183
+ input_byte <<= 1;
184
+ key_byte <<= 1;
185
+ }
186
+ }
187
+
188
+ *result = accumulator;
189
+}
190
+
191
+
192
+static inline int ip6_extension_header_type(__u8 hdr_type)
193
+{
194
+ switch (hdr_type) {
195
+ case IPPROTO_HOPOPTS:
196
+ case IPPROTO_ROUTING:
197
+ case IPPROTO_FRAGMENT:
198
+ case IPPROTO_ICMPV6:
199
+ case IPPROTO_NONE:
200
+ case IPPROTO_DSTOPTS:
201
+ case IPPROTO_MH:
202
+ return 1;
203
+ default:
204
+ return 0;
205
+ }
206
+}
207
+/*
208
+ * According to
209
+ * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml
210
+ * we expect that there are would be no more than 11 extensions in IPv6 header,
211
+ * also there is 27 TLV options for Destination and Hop-by-hop extensions.
212
+ * Need to choose reasonable amount of maximum extensions/options we may
213
+ * check to find ext src/dst.
214
+ */
215
+#define IP6_EXTENSIONS_COUNT 11
216
+#define IP6_OPTIONS_COUNT 30
217
+
218
+static inline int parse_ipv6_ext(struct __sk_buff *skb,
219
+ struct packet_hash_info_t *info,
220
+ __u8 *l4_protocol, size_t *l4_offset)
221
+{
222
+ int err = 0;
223
+
224
+ if (!ip6_extension_header_type(*l4_protocol)) {
225
+ return 0;
226
+ }
227
+
228
+ struct ipv6_opt_hdr ext_hdr = {};
229
+
230
+ for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i) {
231
+
232
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_hdr,
233
+ sizeof(ext_hdr), BPF_HDR_START_NET);
234
+ if (err) {
235
+ goto error;
236
+ }
237
+
238
+ if (*l4_protocol == IPPROTO_ROUTING) {
239
+ struct ipv6_rt_hdr ext_rt = {};
240
+
241
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_rt,
242
+ sizeof(ext_rt), BPF_HDR_START_NET);
243
+ if (err) {
244
+ goto error;
245
+ }
246
+
247
+ if ((ext_rt.type == IPV6_SRCRT_TYPE_2) &&
248
+ (ext_rt.hdrlen == sizeof(struct in6_addr) / 8) &&
249
+ (ext_rt.segments_left == 1)) {
250
+
251
+ err = bpf_skb_load_bytes_relative(skb,
252
+ *l4_offset + offsetof(struct rt2_hdr, addr),
253
+ &info->in6_ext_dst, sizeof(info->in6_ext_dst),
254
+ BPF_HDR_START_NET);
255
+ if (err) {
256
+ goto error;
257
+ }
258
+
259
+ info->is_ipv6_ext_dst = 1;
260
+ }
261
+
262
+ } else if (*l4_protocol == IPPROTO_DSTOPTS) {
263
+ struct ipv6_opt_t {
264
+ __u8 type;
265
+ __u8 length;
266
+ } __attribute__((packed)) opt = {};
267
+
268
+ size_t opt_offset = sizeof(ext_hdr);
269
+
270
+ for (unsigned int j = 0; j < IP6_OPTIONS_COUNT; ++j) {
271
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset + opt_offset,
272
+ &opt, sizeof(opt), BPF_HDR_START_NET);
273
+ if (err) {
274
+ goto error;
275
+ }
276
+
277
+ if (opt.type == IPV6_TLV_HAO) {
278
+ err = bpf_skb_load_bytes_relative(skb,
279
+ *l4_offset + opt_offset
280
+ + offsetof(struct ipv6_destopt_hao, addr),
281
+ &info->in6_ext_src, sizeof(info->in6_ext_src),
282
+ BPF_HDR_START_NET);
283
+ if (err) {
284
+ goto error;
285
+ }
286
+
287
+ info->is_ipv6_ext_src = 1;
288
+ break;
289
+ }
290
+
291
+ opt_offset += (opt.type == IPV6_TLV_PAD1) ?
292
+ 1 : opt.length + sizeof(opt);
293
+
294
+ if (opt_offset + 1 >= ext_hdr.hdrlen * 8) {
295
+ break;
296
+ }
297
+ }
298
+ } else if (*l4_protocol == IPPROTO_FRAGMENT) {
299
+ info->is_fragmented = true;
300
+ }
301
+
302
+ *l4_protocol = ext_hdr.nexthdr;
303
+ *l4_offset += (ext_hdr.hdrlen + 1) * 8;
304
+
305
+ if (!ip6_extension_header_type(ext_hdr.nexthdr)) {
306
+ return 0;
307
+ }
308
+ }
309
+
310
+ return 0;
311
+error:
312
+ return err;
313
+}
314
+
315
+static __be16 parse_eth_type(struct __sk_buff *skb)
316
+{
317
+ unsigned int offset = 12;
318
+ __be16 ret = 0;
319
+ int err = 0;
320
+
321
+ err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
322
+ BPF_HDR_START_MAC);
323
+ if (err) {
324
+ return 0;
325
+ }
326
+
327
+ switch (bpf_ntohs(ret)) {
328
+ case ETH_P_8021AD:
329
+ offset += 4;
330
+ case ETH_P_8021Q:
331
+ offset += 4;
332
+ err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
333
+ BPF_HDR_START_MAC);
334
+ default:
335
+ break;
336
+ }
337
+
338
+ if (err) {
339
+ return 0;
340
+ }
341
+
342
+ return ret;
343
+}
344
+
345
+static inline int parse_packet(struct __sk_buff *skb,
346
+ struct packet_hash_info_t *info)
347
+{
348
+ int err = 0;
349
+
350
+ if (!info || !skb) {
351
+ return -1;
352
+ }
353
+
354
+ size_t l4_offset = 0;
355
+ __u8 l4_protocol = 0;
356
+ __u16 l3_protocol = bpf_ntohs(parse_eth_type(skb));
357
+ if (l3_protocol == 0) {
358
+ err = -1;
359
+ goto error;
360
+ }
361
+
362
+ if (l3_protocol == ETH_P_IP) {
363
+ info->is_ipv4 = 1;
364
+
365
+ struct iphdr ip = {};
366
+ err = bpf_skb_load_bytes_relative(skb, 0, &ip, sizeof(ip),
367
+ BPF_HDR_START_NET);
368
+ if (err) {
369
+ goto error;
370
+ }
371
+
372
+ info->in_src = ip.saddr;
373
+ info->in_dst = ip.daddr;
374
+ info->is_fragmented = !!ip.frag_off;
375
+
376
+ l4_protocol = ip.protocol;
377
+ l4_offset = ip.ihl * 4;
378
+ } else if (l3_protocol == ETH_P_IPV6) {
379
+ info->is_ipv6 = 1;
380
+
381
+ struct ipv6hdr ip6 = {};
382
+ err = bpf_skb_load_bytes_relative(skb, 0, &ip6, sizeof(ip6),
383
+ BPF_HDR_START_NET);
384
+ if (err) {
385
+ goto error;
386
+ }
387
+
388
+ info->in6_src = ip6.saddr;
389
+ info->in6_dst = ip6.daddr;
390
+
391
+ l4_protocol = ip6.nexthdr;
392
+ l4_offset = sizeof(ip6);
393
+
394
+ err = parse_ipv6_ext(skb, info, &l4_protocol, &l4_offset);
395
+ if (err) {
396
+ goto error;
397
+ }
398
+ }
399
+
400
+ if (l4_protocol != 0 && !info->is_fragmented) {
401
+ if (l4_protocol == IPPROTO_TCP) {
402
+ info->is_tcp = 1;
403
+
404
+ struct tcphdr tcp = {};
405
+ err = bpf_skb_load_bytes_relative(skb, l4_offset, &tcp, sizeof(tcp),
406
+ BPF_HDR_START_NET);
407
+ if (err) {
408
+ goto error;
409
+ }
410
+
411
+ info->src_port = tcp.source;
412
+ info->dst_port = tcp.dest;
413
+ } else if (l4_protocol == IPPROTO_UDP) { /* TODO: add udplite? */
414
+ info->is_udp = 1;
415
+
416
+ struct udphdr udp = {};
417
+ err = bpf_skb_load_bytes_relative(skb, l4_offset, &udp, sizeof(udp),
418
+ BPF_HDR_START_NET);
419
+ if (err) {
420
+ goto error;
421
+ }
422
+
423
+ info->src_port = udp.source;
424
+ info->dst_port = udp.dest;
425
+ }
426
+ }
427
+
428
+ return 0;
429
+
430
+error:
431
+ return err;
432
+}
433
+
434
+static inline __u32 calculate_rss_hash(struct __sk_buff *skb,
435
+ struct rss_config_t *config, struct toeplitz_key_data_t *toe)
436
+{
437
+ __u8 rss_input[HASH_CALCULATION_BUFFER_SIZE] = {};
438
+ size_t bytes_written = 0;
439
+ __u32 result = 0;
440
+ int err = 0;
441
+ struct packet_hash_info_t packet_info = {};
442
+
443
+ err = parse_packet(skb, &packet_info);
444
+ if (err) {
445
+ return 0;
446
+ }
447
+
448
+ if (packet_info.is_ipv4) {
449
+ if (packet_info.is_tcp &&
450
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
451
+
452
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
453
+ &packet_info.in_src,
454
+ sizeof(packet_info.in_src));
455
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
456
+ &packet_info.in_dst,
457
+ sizeof(packet_info.in_dst));
458
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
459
+ &packet_info.src_port,
460
+ sizeof(packet_info.src_port));
461
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
462
+ &packet_info.dst_port,
463
+ sizeof(packet_info.dst_port));
464
+ } else if (packet_info.is_udp &&
465
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
466
+
467
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
468
+ &packet_info.in_src,
469
+ sizeof(packet_info.in_src));
470
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
471
+ &packet_info.in_dst,
472
+ sizeof(packet_info.in_dst));
473
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
474
+ &packet_info.src_port,
475
+ sizeof(packet_info.src_port));
476
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
477
+ &packet_info.dst_port,
478
+ sizeof(packet_info.dst_port));
479
+ } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
480
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
481
+ &packet_info.in_src,
482
+ sizeof(packet_info.in_src));
483
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
484
+ &packet_info.in_dst,
485
+ sizeof(packet_info.in_dst));
486
+ }
487
+ } else if (packet_info.is_ipv6) {
488
+ if (packet_info.is_tcp &&
489
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
490
+
491
+ if (packet_info.is_ipv6_ext_src &&
492
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
493
+
494
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
495
+ &packet_info.in6_ext_src,
496
+ sizeof(packet_info.in6_ext_src));
497
+ } else {
498
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
499
+ &packet_info.in6_src,
500
+ sizeof(packet_info.in6_src));
501
+ }
502
+ if (packet_info.is_ipv6_ext_dst &&
503
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
504
+
505
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
506
+ &packet_info.in6_ext_dst,
507
+ sizeof(packet_info.in6_ext_dst));
508
+ } else {
509
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
510
+ &packet_info.in6_dst,
511
+ sizeof(packet_info.in6_dst));
512
+ }
513
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
514
+ &packet_info.src_port,
515
+ sizeof(packet_info.src_port));
516
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
517
+ &packet_info.dst_port,
518
+ sizeof(packet_info.dst_port));
519
+ } else if (packet_info.is_udp &&
520
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
521
+
522
+ if (packet_info.is_ipv6_ext_src &&
523
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
524
+
525
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
526
+ &packet_info.in6_ext_src,
527
+ sizeof(packet_info.in6_ext_src));
528
+ } else {
529
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
530
+ &packet_info.in6_src,
531
+ sizeof(packet_info.in6_src));
532
+ }
533
+ if (packet_info.is_ipv6_ext_dst &&
534
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
535
+
536
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
537
+ &packet_info.in6_ext_dst,
538
+ sizeof(packet_info.in6_ext_dst));
539
+ } else {
540
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
541
+ &packet_info.in6_dst,
542
+ sizeof(packet_info.in6_dst));
543
+ }
544
+
545
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
546
+ &packet_info.src_port,
547
+ sizeof(packet_info.src_port));
548
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
549
+ &packet_info.dst_port,
550
+ sizeof(packet_info.dst_port));
551
+
552
+ } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
553
+ if (packet_info.is_ipv6_ext_src &&
554
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
555
+
556
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
557
+ &packet_info.in6_ext_src,
558
+ sizeof(packet_info.in6_ext_src));
559
+ } else {
560
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
561
+ &packet_info.in6_src,
562
+ sizeof(packet_info.in6_src));
563
+ }
564
+ if (packet_info.is_ipv6_ext_dst &&
565
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
566
+
567
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
568
+ &packet_info.in6_ext_dst,
569
+ sizeof(packet_info.in6_ext_dst));
570
+ } else {
571
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
572
+ &packet_info.in6_dst,
573
+ sizeof(packet_info.in6_dst));
574
+ }
575
+ }
576
+ }
577
+
578
+ if (bytes_written) {
579
+ net_toeplitz_add(&result, rss_input, bytes_written, toe);
580
+ }
581
+
582
+ return result;
583
+}
584
+
585
+SEC("tun_rss_steering")
586
+int tun_rss_steering_prog(struct __sk_buff *skb)
587
+{
588
+
589
+ struct rss_config_t *config;
590
+ struct toeplitz_key_data_t *toe;
591
+
592
+ __u32 key = 0;
593
+ __u32 hash = 0;
594
+
595
+ config = bpf_map_lookup_elem(&tap_rss_map_configurations, &key);
596
+ toe = bpf_map_lookup_elem(&tap_rss_map_toeplitz_key, &key);
597
+
598
+ if (config && toe) {
599
+ if (!config->redirect) {
600
+ return config->default_queue;
601
+ }
602
+
603
+ hash = calculate_rss_hash(skb, config, toe);
604
+ if (hash) {
605
+ __u32 table_idx = hash % config->indirections_len;
606
+ __u16 *queue = 0;
607
+
608
+ queue = bpf_map_lookup_elem(&tap_rss_map_indirection_table,
609
+ &table_idx);
610
+
611
+ if (queue) {
612
+ return *queue;
613
+ }
614
+ }
615
+
616
+ return config->default_queue;
617
+ }
618
+
619
+ return -1;
620
+}
621
+
622
+char _license[] SEC("license") = "GPL v2";
36
--
623
--
37
2.5.0
624
2.7.4
38
625
39
626
diff view generated by jsdifflib
1
From: Yuri Benditovich <yuri.benditovich@daynix.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
Add support for following hash types:
3
Added function that loads RSS eBPF program.
4
IPV6 TCP with extension headers
4
Added stub functions for RSS eBPF loader.
5
IPV4 UDP
5
Added meson and configuration options.
6
IPV6 UDP
6
7
IPV6 UDP with extension headers
7
By default, eBPF feature enabled if libbpf is present in the build system.
8
libbpf checked in configuration shell script and meson script.
8
9
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
10
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
10
Acked-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
11
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
---
13
hw/net/net_rx_pkt.c | 42 ++++++++++++++++++++++++++++++++++++++++++
14
configure | 8 +-
14
hw/net/net_rx_pkt.h | 6 +++++-
15
ebpf/ebpf_rss-stub.c | 40 +++++
15
hw/net/trace-events | 4 ++++
16
ebpf/ebpf_rss.c | 165 ++++++++++++++++++
16
3 files changed, 51 insertions(+), 1 deletion(-)
17
ebpf/ebpf_rss.h | 44 +++++
18
ebpf/meson.build | 1 +
19
ebpf/rss.bpf.skeleton.h | 431 ++++++++++++++++++++++++++++++++++++++++++++++++
20
ebpf/trace-events | 4 +
21
ebpf/trace.h | 1 +
22
meson.build | 23 +++
23
meson_options.txt | 2 +
24
10 files changed, 718 insertions(+), 1 deletion(-)
25
create mode 100644 ebpf/ebpf_rss-stub.c
26
create mode 100644 ebpf/ebpf_rss.c
27
create mode 100644 ebpf/ebpf_rss.h
28
create mode 100644 ebpf/meson.build
29
create mode 100644 ebpf/rss.bpf.skeleton.h
30
create mode 100644 ebpf/trace-events
31
create mode 100644 ebpf/trace.h
17
32
18
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
33
diff --git a/configure b/configure
34
index XXXXXXX..XXXXXXX 100755
35
--- a/configure
36
+++ b/configure
37
@@ -XXX,XX +XXX,XX @@ vhost_vsock="$default_feature"
38
vhost_user="no"
39
vhost_user_blk_server="auto"
40
vhost_user_fs="$default_feature"
41
+bpf="auto"
42
kvm="auto"
43
hax="auto"
44
hvf="auto"
45
@@ -XXX,XX +XXX,XX @@ for opt do
46
;;
47
--enable-membarrier) membarrier="yes"
48
;;
49
+ --disable-bpf) bpf="disabled"
50
+ ;;
51
+ --enable-bpf) bpf="enabled"
52
+ ;;
53
--disable-blobs) blobs="false"
54
;;
55
--with-pkgversion=*) pkgversion="$optarg"
56
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available
57
vhost-user vhost-user backend support
58
vhost-user-blk-server vhost-user-blk server support
59
vhost-vdpa vhost-vdpa kernel backend support
60
+ bpf BPF kernel support
61
spice spice
62
spice-protocol spice-protocol
63
rbd rados block device (rbd)
64
@@ -XXX,XX +XXX,XX @@ if test "$skip_meson" = no; then
65
-Dattr=$attr -Ddefault_devices=$default_devices \
66
-Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
67
-Dvhost_user_blk_server=$vhost_user_blk_server -Dmultiprocess=$multiprocess \
68
- -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi \
69
+ -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi -Dbpf=$bpf\
70
$(if test "$default_features" = no; then echo "-Dauto_features=disabled"; fi) \
71
    -Dtcg_interpreter=$tcg_interpreter \
72
$cross_arg \
73
diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
74
new file mode 100644
75
index XXXXXXX..XXXXXXX
76
--- /dev/null
77
+++ b/ebpf/ebpf_rss-stub.c
78
@@ -XXX,XX +XXX,XX @@
79
+/*
80
+ * eBPF RSS stub file
81
+ *
82
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
83
+ *
84
+ * Authors:
85
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
86
+ *
87
+ * This work is licensed under the terms of the GNU GPL, version 2. See
88
+ * the COPYING file in the top-level directory.
89
+ */
90
+
91
+#include "qemu/osdep.h"
92
+#include "ebpf/ebpf_rss.h"
93
+
94
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
95
+{
96
+
97
+}
98
+
99
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
100
+{
101
+ return false;
102
+}
103
+
104
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
105
+{
106
+ return false;
107
+}
108
+
109
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
110
+ uint16_t *indirections_table, uint8_t *toeplitz_key)
111
+{
112
+ return false;
113
+}
114
+
115
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
116
+{
117
+
118
+}
119
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
120
new file mode 100644
121
index XXXXXXX..XXXXXXX
122
--- /dev/null
123
+++ b/ebpf/ebpf_rss.c
124
@@ -XXX,XX +XXX,XX @@
125
+/*
126
+ * eBPF RSS loader
127
+ *
128
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
129
+ *
130
+ * Authors:
131
+ * Andrew Melnychenko <andrew@daynix.com>
132
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
133
+ *
134
+ * This work is licensed under the terms of the GNU GPL, version 2. See
135
+ * the COPYING file in the top-level directory.
136
+ */
137
+
138
+#include "qemu/osdep.h"
139
+#include "qemu/error-report.h"
140
+
141
+#include <bpf/libbpf.h>
142
+#include <bpf/bpf.h>
143
+
144
+#include "hw/virtio/virtio-net.h" /* VIRTIO_NET_RSS_MAX_TABLE_LEN */
145
+
146
+#include "ebpf/ebpf_rss.h"
147
+#include "ebpf/rss.bpf.skeleton.h"
148
+#include "trace.h"
149
+
150
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
151
+{
152
+ if (ctx != NULL) {
153
+ ctx->obj = NULL;
154
+ }
155
+}
156
+
157
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
158
+{
159
+ return ctx != NULL && ctx->obj != NULL;
160
+}
161
+
162
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
163
+{
164
+ struct rss_bpf *rss_bpf_ctx;
165
+
166
+ if (ctx == NULL) {
167
+ return false;
168
+ }
169
+
170
+ rss_bpf_ctx = rss_bpf__open();
171
+ if (rss_bpf_ctx == NULL) {
172
+ trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object");
173
+ goto error;
174
+ }
175
+
176
+ bpf_program__set_socket_filter(rss_bpf_ctx->progs.tun_rss_steering_prog);
177
+
178
+ if (rss_bpf__load(rss_bpf_ctx)) {
179
+ trace_ebpf_error("eBPF RSS", "can not load RSS program");
180
+ goto error;
181
+ }
182
+
183
+ ctx->obj = rss_bpf_ctx;
184
+ ctx->program_fd = bpf_program__fd(
185
+ rss_bpf_ctx->progs.tun_rss_steering_prog);
186
+ ctx->map_configuration = bpf_map__fd(
187
+ rss_bpf_ctx->maps.tap_rss_map_configurations);
188
+ ctx->map_indirections_table = bpf_map__fd(
189
+ rss_bpf_ctx->maps.tap_rss_map_indirection_table);
190
+ ctx->map_toeplitz_key = bpf_map__fd(
191
+ rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
192
+
193
+ return true;
194
+error:
195
+ rss_bpf__destroy(rss_bpf_ctx);
196
+ ctx->obj = NULL;
197
+
198
+ return false;
199
+}
200
+
201
+static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
202
+ struct EBPFRSSConfig *config)
203
+{
204
+ uint32_t map_key = 0;
205
+
206
+ if (!ebpf_rss_is_loaded(ctx)) {
207
+ return false;
208
+ }
209
+ if (bpf_map_update_elem(ctx->map_configuration,
210
+ &map_key, config, 0) < 0) {
211
+ return false;
212
+ }
213
+ return true;
214
+}
215
+
216
+static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
217
+ uint16_t *indirections_table,
218
+ size_t len)
219
+{
220
+ uint32_t i = 0;
221
+
222
+ if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
223
+ len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
224
+ return false;
225
+ }
226
+
227
+ for (; i < len; ++i) {
228
+ if (bpf_map_update_elem(ctx->map_indirections_table, &i,
229
+ indirections_table + i, 0) < 0) {
230
+ return false;
231
+ }
232
+ }
233
+ return true;
234
+}
235
+
236
+static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
237
+ uint8_t *toeplitz_key)
238
+{
239
+ uint32_t map_key = 0;
240
+
241
+ /* prepare toeplitz key */
242
+ uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
243
+
244
+ if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) {
245
+ return false;
246
+ }
247
+ memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
248
+ *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
249
+
250
+ if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
251
+ 0) < 0) {
252
+ return false;
253
+ }
254
+ return true;
255
+}
256
+
257
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
258
+ uint16_t *indirections_table, uint8_t *toeplitz_key)
259
+{
260
+ if (!ebpf_rss_is_loaded(ctx) || config == NULL ||
261
+ indirections_table == NULL || toeplitz_key == NULL) {
262
+ return false;
263
+ }
264
+
265
+ if (!ebpf_rss_set_config(ctx, config)) {
266
+ return false;
267
+ }
268
+
269
+ if (!ebpf_rss_set_indirections_table(ctx, indirections_table,
270
+ config->indirections_len)) {
271
+ return false;
272
+ }
273
+
274
+ if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) {
275
+ return false;
276
+ }
277
+
278
+ return true;
279
+}
280
+
281
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
282
+{
283
+ if (!ebpf_rss_is_loaded(ctx)) {
284
+ return;
285
+ }
286
+
287
+ rss_bpf__destroy(ctx->obj);
288
+ ctx->obj = NULL;
289
+}
290
diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
291
new file mode 100644
292
index XXXXXXX..XXXXXXX
293
--- /dev/null
294
+++ b/ebpf/ebpf_rss.h
295
@@ -XXX,XX +XXX,XX @@
296
+/*
297
+ * eBPF RSS header
298
+ *
299
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
300
+ *
301
+ * Authors:
302
+ * Andrew Melnychenko <andrew@daynix.com>
303
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
304
+ *
305
+ * This work is licensed under the terms of the GNU GPL, version 2. See
306
+ * the COPYING file in the top-level directory.
307
+ */
308
+
309
+#ifndef QEMU_EBPF_RSS_H
310
+#define QEMU_EBPF_RSS_H
311
+
312
+struct EBPFRSSContext {
313
+ void *obj;
314
+ int program_fd;
315
+ int map_configuration;
316
+ int map_toeplitz_key;
317
+ int map_indirections_table;
318
+};
319
+
320
+struct EBPFRSSConfig {
321
+ uint8_t redirect;
322
+ uint8_t populate_hash;
323
+ uint32_t hash_types;
324
+ uint16_t indirections_len;
325
+ uint16_t default_queue;
326
+} __attribute__((packed));
327
+
328
+void ebpf_rss_init(struct EBPFRSSContext *ctx);
329
+
330
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
331
+
332
+bool ebpf_rss_load(struct EBPFRSSContext *ctx);
333
+
334
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
335
+ uint16_t *indirections_table, uint8_t *toeplitz_key);
336
+
337
+void ebpf_rss_unload(struct EBPFRSSContext *ctx);
338
+
339
+#endif /* QEMU_EBPF_RSS_H */
340
diff --git a/ebpf/meson.build b/ebpf/meson.build
341
new file mode 100644
342
index XXXXXXX..XXXXXXX
343
--- /dev/null
344
+++ b/ebpf/meson.build
345
@@ -0,0 +1 @@
346
+common_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
347
diff --git a/ebpf/rss.bpf.skeleton.h b/ebpf/rss.bpf.skeleton.h
348
new file mode 100644
349
index XXXXXXX..XXXXXXX
350
--- /dev/null
351
+++ b/ebpf/rss.bpf.skeleton.h
352
@@ -XXX,XX +XXX,XX @@
353
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
354
+
355
+/* THIS FILE IS AUTOGENERATED! */
356
+#ifndef __RSS_BPF_SKEL_H__
357
+#define __RSS_BPF_SKEL_H__
358
+
359
+#include <stdlib.h>
360
+#include <bpf/libbpf.h>
361
+
362
+struct rss_bpf {
363
+    struct bpf_object_skeleton *skeleton;
364
+    struct bpf_object *obj;
365
+    struct {
366
+        struct bpf_map *tap_rss_map_configurations;
367
+        struct bpf_map *tap_rss_map_indirection_table;
368
+        struct bpf_map *tap_rss_map_toeplitz_key;
369
+    } maps;
370
+    struct {
371
+        struct bpf_program *tun_rss_steering_prog;
372
+    } progs;
373
+    struct {
374
+        struct bpf_link *tun_rss_steering_prog;
375
+    } links;
376
+};
377
+
378
+static void
379
+rss_bpf__destroy(struct rss_bpf *obj)
380
+{
381
+    if (!obj)
382
+        return;
383
+    if (obj->skeleton)
384
+        bpf_object__destroy_skeleton(obj->skeleton);
385
+    free(obj);
386
+}
387
+
388
+static inline int
389
+rss_bpf__create_skeleton(struct rss_bpf *obj);
390
+
391
+static inline struct rss_bpf *
392
+rss_bpf__open_opts(const struct bpf_object_open_opts *opts)
393
+{
394
+    struct rss_bpf *obj;
395
+
396
+    obj = (struct rss_bpf *)calloc(1, sizeof(*obj));
397
+    if (!obj)
398
+        return NULL;
399
+    if (rss_bpf__create_skeleton(obj))
400
+        goto err;
401
+    if (bpf_object__open_skeleton(obj->skeleton, opts))
402
+        goto err;
403
+
404
+    return obj;
405
+err:
406
+    rss_bpf__destroy(obj);
407
+    return NULL;
408
+}
409
+
410
+static inline struct rss_bpf *
411
+rss_bpf__open(void)
412
+{
413
+    return rss_bpf__open_opts(NULL);
414
+}
415
+
416
+static inline int
417
+rss_bpf__load(struct rss_bpf *obj)
418
+{
419
+    return bpf_object__load_skeleton(obj->skeleton);
420
+}
421
+
422
+static inline struct rss_bpf *
423
+rss_bpf__open_and_load(void)
424
+{
425
+    struct rss_bpf *obj;
426
+
427
+    obj = rss_bpf__open();
428
+    if (!obj)
429
+        return NULL;
430
+    if (rss_bpf__load(obj)) {
431
+        rss_bpf__destroy(obj);
432
+        return NULL;
433
+    }
434
+    return obj;
435
+}
436
+
437
+static inline int
438
+rss_bpf__attach(struct rss_bpf *obj)
439
+{
440
+    return bpf_object__attach_skeleton(obj->skeleton);
441
+}
442
+
443
+static inline void
444
+rss_bpf__detach(struct rss_bpf *obj)
445
+{
446
+    return bpf_object__detach_skeleton(obj->skeleton);
447
+}
448
+
449
+static inline int
450
+rss_bpf__create_skeleton(struct rss_bpf *obj)
451
+{
452
+    struct bpf_object_skeleton *s;
453
+
454
+    s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
455
+    if (!s)
456
+        return -1;
457
+    obj->skeleton = s;
458
+
459
+    s->sz = sizeof(*s);
460
+    s->name = "rss_bpf";
461
+    s->obj = &obj->obj;
462
+
463
+    /* maps */
464
+    s->map_cnt = 3;
465
+    s->map_skel_sz = sizeof(*s->maps);
466
+    s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
467
+    if (!s->maps)
468
+        goto err;
469
+
470
+    s->maps[0].name = "tap_rss_map_configurations";
471
+    s->maps[0].map = &obj->maps.tap_rss_map_configurations;
472
+
473
+    s->maps[1].name = "tap_rss_map_indirection_table";
474
+    s->maps[1].map = &obj->maps.tap_rss_map_indirection_table;
475
+
476
+    s->maps[2].name = "tap_rss_map_toeplitz_key";
477
+    s->maps[2].map = &obj->maps.tap_rss_map_toeplitz_key;
478
+
479
+    /* programs */
480
+    s->prog_cnt = 1;
481
+    s->prog_skel_sz = sizeof(*s->progs);
482
+    s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
483
+    if (!s->progs)
484
+        goto err;
485
+
486
+    s->progs[0].name = "tun_rss_steering_prog";
487
+    s->progs[0].prog = &obj->progs.tun_rss_steering_prog;
488
+    s->progs[0].link = &obj->links.tun_rss_steering_prog;
489
+
490
+    s->data_sz = 8088;
491
+    s->data = (void *)"\
492
+\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
493
+\0\0\0\0\0\0\0\0\0\0\0\x18\x1d\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0a\0\
494
+\x01\0\xbf\x18\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x4c\xff\0\0\0\0\xbf\xa7\
495
+\0\0\0\0\0\0\x07\x07\0\0\x4c\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
496
+\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x06\0\0\0\0\0\0\x18\x01\0\0\0\0\0\
497
+\0\0\0\0\0\0\0\0\0\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x07\0\0\0\0\0\0\
498
+\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x66\x02\0\0\0\0\xbf\x79\0\0\
499
+\0\0\0\0\x15\x09\x64\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
500
+\0\x5d\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc0\xff\0\0\0\0\x7b\x1a\xb8\xff\
501
+\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\
502
+\0\x63\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\x1a\x88\xff\0\0\0\0\x7b\
503
+\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\x70\xff\0\0\0\0\x7b\x1a\
504
+\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\xff\0\0\0\0\x7b\x1a\x50\
505
+\xff\0\0\0\0\x15\x08\x4c\x02\0\0\0\0\x6b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
506
+\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
507
+\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\
508
+\x77\0\0\0\x20\0\0\0\x55\0\x11\0\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xd0\xff\
509
+\0\0\0\0\xbf\x13\0\0\0\0\0\0\xdc\x03\0\0\x10\0\0\0\x15\x03\x02\0\0\x81\0\0\x55\
510
+\x03\x0c\0\xa8\x88\0\0\xb7\x02\0\0\x14\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
511
+\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\
512
+\x85\0\0\0\x44\0\0\0\x69\xa1\xd0\xff\0\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\
513
+\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x2f\x02\0\0\0\0\x15\x01\x2e\x02\0\0\0\0\x7b\
514
+\x9a\x30\xff\0\0\0\0\x15\x01\x57\0\x86\xdd\0\0\x55\x01\x3b\0\x08\0\0\0\x7b\x7a\
515
+\x20\xff\0\0\0\0\xb7\x07\0\0\x01\0\0\0\x73\x7a\x50\xff\0\0\0\0\xb7\x01\0\0\0\0\
516
+\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\
517
+\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\
518
+\0\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\
519
+\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x1a\x02\0\0\0\0\x69\xa1\xd6\xff\0\0\
520
+\0\0\x55\x01\x01\0\0\0\0\0\xb7\x07\0\0\0\0\0\0\x61\xa1\xdc\xff\0\0\0\0\x63\x1a\
521
+\x5c\xff\0\0\0\0\x61\xa1\xe0\xff\0\0\0\0\x63\x1a\x60\xff\0\0\0\0\x73\x7a\x56\
522
+\xff\0\0\0\0\x71\xa9\xd9\xff\0\0\0\0\x71\xa1\xd0\xff\0\0\0\0\x67\x01\0\0\x02\0\
523
+\0\0\x57\x01\0\0\x3c\0\0\0\x7b\x1a\x40\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\xbf\
524
+\x91\0\0\0\0\0\0\x57\x01\0\0\xff\0\0\0\x15\x01\x19\0\0\0\0\0\x71\xa1\x56\xff\0\
525
+\0\0\0\x55\x01\x17\0\0\0\0\0\x57\x09\0\0\xff\0\0\0\x15\x09\x7a\x01\x11\0\0\0\
526
+\x55\x09\x14\0\x06\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x53\xff\0\0\0\0\xb7\x01\
527
+\0\0\0\0\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\
528
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
529
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
530
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\xf4\x01\0\0\0\0\x69\xa1\
531
+\xd0\xff\0\0\0\0\x6b\x1a\x58\xff\0\0\0\0\x69\xa1\xd2\xff\0\0\0\0\x6b\x1a\x5a\
532
+\xff\0\0\0\0\x71\xa1\x50\xff\0\0\0\0\x15\x01\xd4\0\0\0\0\0\x71\x62\x03\0\0\0\0\
533
+\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\x12\0\0\0\0\0\0\x71\x63\x04\
534
+\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\0\0\x4f\x31\0\0\0\0\0\0\x67\
535
+\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\xff\0\0\0\0\x79\xa0\x30\xff\
536
+\0\0\0\0\x15\x02\x06\x01\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x02\0\0\0\x15\
537
+\x02\x03\x01\0\0\0\0\x61\xa1\x5c\xff\0\0\0\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\
538
+\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x69\xa1\x58\xff\0\0\0\0\x6b\x1a\xa8\
539
+\xff\0\0\0\0\x69\xa1\x5a\xff\0\0\0\0\x6b\x1a\xaa\xff\0\0\0\0\x05\0\x65\x01\0\0\
540
+\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x51\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\
541
+\xf0\xff\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\
542
+\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\
543
+\xff\xff\xb7\x01\0\0\x28\0\0\0\x7b\x1a\x40\xff\0\0\0\0\xbf\x81\0\0\0\0\0\0\xb7\
544
+\x02\0\0\0\0\0\0\xb7\x04\0\0\x28\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\
545
+\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x10\x01\0\0\0\0\x79\xa1\xe0\
546
+\xff\0\0\0\0\x63\x1a\x64\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x68\xff\0\0\
547
+\0\0\x79\xa1\xd8\xff\0\0\0\0\x63\x1a\x5c\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\
548
+\x1a\x60\xff\0\0\0\0\x79\xa1\xe8\xff\0\0\0\0\x63\x1a\x6c\xff\0\0\0\0\x77\x01\0\
549
+\0\x20\0\0\0\x63\x1a\x70\xff\0\0\0\0\x79\xa1\xf0\xff\0\0\0\0\x63\x1a\x74\xff\0\
550
+\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x78\xff\0\0\0\0\x71\xa9\xd6\xff\0\0\0\0\
551
+\x25\x09\xff\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\0\0\x18\x02\0\0\
552
+\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\0\
553
+\xf8\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x6b\x1a\xfe\xff\0\0\0\0\xb7\x01\0\0\x28\0\0\
554
+\0\x7b\x1a\x40\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x8c\xff\xff\xff\x7b\
555
+\x1a\x18\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x7c\xff\xff\xff\x7b\x1a\
556
+\x10\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\x28\xff\0\0\0\0\x7b\x7a\x20\xff\0\
557
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xfe\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
558
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
559
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x90\
560
+\x01\0\0\0\0\xbf\x91\0\0\0\0\0\0\x15\x01\x23\0\x3c\0\0\0\x15\x01\x59\0\x2c\0\0\
561
+\0\x55\x01\x5a\0\x2b\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xf8\xff\0\0\0\0\xbf\xa3\
562
+\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\
563
+\0\0\0\xb7\x04\0\0\x04\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\
564
+\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x03\x01\0\0\0\
565
+\0\x71\xa1\xfa\xff\0\0\0\0\x55\x01\x4b\0\x02\0\0\0\x71\xa1\xf9\xff\0\0\0\0\x55\
566
+\x01\x49\0\x02\0\0\0\x71\xa1\xfb\xff\0\0\0\0\x55\x01\x47\0\x01\0\0\0\x79\xa2\
567
+\x40\xff\0\0\0\0\x07\x02\0\0\x08\0\0\0\xbf\x81\0\0\0\0\0\0\x79\xa3\x18\xff\0\0\
568
+\0\0\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\
569
+\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\xf2\0\0\0\0\0\
570
+\xb7\x01\0\0\x01\0\0\0\x73\x1a\x55\xff\0\0\0\0\x05\0\x39\0\0\0\0\0\xb7\x01\0\0\
571
+\0\0\0\0\x6b\x1a\xf8\xff\0\0\0\0\xb7\x09\0\0\x02\0\0\0\xb7\x07\0\0\x1e\0\0\0\
572
+\x05\0\x0e\0\0\0\0\0\x79\xa2\x38\xff\0\0\0\0\x0f\x29\0\0\0\0\0\0\xbf\x92\0\0\0\
573
+\0\0\0\x07\x02\0\0\x01\0\0\0\x71\xa3\xff\xff\0\0\0\0\x67\x03\0\0\x03\0\0\0\x2d\
574
+\x23\x02\0\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x2b\0\0\0\0\0\x07\x07\0\0\xff\
575
+\xff\xff\xff\xbf\x72\0\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x77\x02\0\0\x20\0\0\0\
576
+\x15\x02\xf9\xff\0\0\0\0\x7b\x9a\x38\xff\0\0\0\0\x79\xa1\x40\xff\0\0\0\0\x0f\
577
+\x19\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\
578
+\0\0\0\xbf\x92\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\
579
+\0\x44\0\0\0\xbf\x01\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\
580
+\x55\x01\x94\0\0\0\0\0\x71\xa2\xf8\xff\0\0\0\0\x55\x02\x0f\0\xc9\0\0\0\x07\x09\
581
+\0\0\x02\0\0\0\xbf\x81\0\0\0\0\0\0\xbf\x92\0\0\0\0\0\0\x79\xa3\x10\xff\0\0\0\0\
582
+\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\0\0\
583
+\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x87\0\0\0\0\0\xb7\
584
+\x01\0\0\x01\0\0\0\x73\x1a\x54\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x07\0\
585
+\0\0\0\0\xb7\x09\0\0\x01\0\0\0\x15\x02\xd1\xff\0\0\0\0\x71\xa9\xf9\xff\0\0\0\0\
586
+\x07\x09\0\0\x02\0\0\0\x05\0\xce\xff\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x56\
587
+\xff\0\0\0\0\x71\xa1\xff\xff\0\0\0\0\x67\x01\0\0\x03\0\0\0\x79\xa2\x40\xff\0\0\
588
+\0\0\x0f\x12\0\0\0\0\0\0\x07\x02\0\0\x08\0\0\0\x7b\x2a\x40\xff\0\0\0\0\x71\xa9\
589
+\xfe\xff\0\0\0\0\x25\x09\x0e\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\
590
+\0\0\x18\x02\0\0\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\
591
+\0\0\0\0\0\x05\0\x07\0\0\0\0\0\x79\xa1\x28\xff\0\0\0\0\x07\x01\0\0\x01\0\0\0\
592
+\x7b\x1a\x28\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\
593
+\x82\xff\x0b\0\0\0\x05\0\x10\xff\0\0\0\0\x15\x09\xf8\xff\x87\0\0\0\x05\0\xfd\
594
+\xff\0\0\0\0\x71\xa1\x51\xff\0\0\0\0\x79\xa0\x30\xff\0\0\0\0\x15\x01\x17\x01\0\
595
+\0\0\0\x71\x62\x03\0\0\0\0\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\
596
+\x12\0\0\0\0\0\0\x71\x63\x04\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\
597
+\0\0\x4f\x31\0\0\0\0\0\0\x67\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\
598
+\xff\0\0\0\0\x15\x02\x3d\0\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x10\0\0\0\
599
+\x15\x02\x3a\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
600
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
601
+\x07\x03\0\0\x7c\xff\xff\xff\x67\x01\0\0\x38\0\0\0\xc7\x01\0\0\x38\0\0\0\x65\
602
+\x01\x01\0\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
603
+\x6c\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\
604
+\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x65\x01\x01\0\xff\xff\xff\
605
+\xff\xbf\x43\0\0\0\0\0\0\x61\x21\x04\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\x24\0\
606
+\0\0\0\0\0\x4f\x41\0\0\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\0\x61\x21\x08\0\0\0\0\0\
607
+\x61\x22\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\xa8\
608
+\xff\0\0\0\0\x61\x31\0\0\0\0\0\0\x61\x32\x04\0\0\0\0\0\x61\x34\x08\0\0\0\0\0\
609
+\x61\x33\x0c\0\0\0\0\0\x69\xa5\x5a\xff\0\0\0\0\x6b\x5a\xc2\xff\0\0\0\0\x69\xa5\
610
+\x58\xff\0\0\0\0\x6b\x5a\xc0\xff\0\0\0\0\x67\x03\0\0\x20\0\0\0\x4f\x43\0\0\0\0\
611
+\0\0\x7b\x3a\xb8\xff\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\
612
+\xb0\xff\0\0\0\0\x05\0\x6b\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x04\0\0\0\
613
+\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x04\0\0\0\x15\x02\x01\0\0\0\0\0\x05\0\xf7\
614
+\xfe\0\0\0\0\x57\x01\0\0\x01\0\0\0\x15\x01\xd3\0\0\0\0\0\x61\xa1\x5c\xff\0\0\0\
615
+\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x05\
616
+\0\x5e\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x1e\0\0\0\0\0\xbf\x12\0\0\0\0\
617
+\0\0\x57\x02\0\0\x20\0\0\0\x15\x02\x1b\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\
618
+\0\x5c\xff\xff\xff\x71\xa4\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\
619
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\0\x01\0\0\
620
+\x15\x01\x01\0\0\0\0\0\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x6c\
621
+\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\0\0\
622
+\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x15\x01\xc3\xff\0\0\0\0\x05\0\
623
+\xc1\xff\0\0\0\0\xb7\x09\0\0\x3c\0\0\0\x79\xa7\x20\xff\0\0\0\0\x67\0\0\0\x20\0\
624
+\0\0\x77\0\0\0\x20\0\0\0\x15\0\xa5\xfe\0\0\0\0\x05\0\xb0\0\0\0\0\0\x15\x09\x07\
625
+\xff\x87\0\0\0\x05\0\xa2\xfe\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x08\0\0\0\
626
+\x15\x02\xab\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
627
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
628
+\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\x40\0\0\0\x15\x01\x01\0\0\0\0\0\xbf\
629
+\x32\0\0\0\0\0\0\x61\x23\x04\0\0\0\0\0\x67\x03\0\0\x20\0\0\0\x61\x24\0\0\0\0\0\
630
+\0\x4f\x43\0\0\0\0\0\0\x7b\x3a\xa0\xff\0\0\0\0\x61\x23\x08\0\0\0\0\0\x61\x22\
631
+\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x32\0\0\0\0\0\0\x7b\x2a\xa8\xff\0\0\0\
632
+\0\x15\x01\x1c\0\0\0\0\0\x71\xa1\x55\xff\0\0\0\0\x15\x01\x1a\0\0\0\0\0\x61\xa1\
633
+\x98\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x94\xff\0\0\0\0\x4f\x21\0\0\0\0\
634
+\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x90\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\
635
+\xa2\x8c\xff\0\0\0\0\x05\0\x19\0\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x52\xff\
636
+\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\
637
+\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\0\0\0\xb7\x04\0\
638
+\0\x08\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\
639
+\0\0\0\x20\0\0\0\x55\0\x7d\0\0\0\0\0\x05\0\x88\xfe\0\0\0\0\xb7\x09\0\0\x2b\0\0\
640
+\0\x05\0\xc6\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\
641
+\x74\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x70\xff\0\
642
+\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x6c\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\
643
+\x1a\xb0\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x07\x07\0\0\x04\0\0\0\x61\x03\0\0\0\0\
644
+\0\0\xb7\x05\0\0\0\0\0\0\x05\0\x4e\0\0\0\0\0\xaf\x52\0\0\0\0\0\0\xbf\x75\0\0\0\
645
+\0\0\0\x0f\x15\0\0\0\0\0\0\x71\x55\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\
646
+\0\0\0\0\0\x77\0\0\0\x07\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\
647
+\0\x39\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
648
+\x50\0\0\0\0\0\0\x77\0\0\0\x06\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
649
+\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3a\0\0\0\xc7\0\0\0\x3f\0\0\
650
+\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\
651
+\0\0\0\x77\0\0\0\x05\0\0\0\x57\0\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\
652
+\0\0\0\0\x67\0\0\0\x3b\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\
653
+\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x04\0\0\0\x57\0\
654
+\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3c\0\0\0\xc7\
655
+\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\
656
+\x77\0\0\0\x03\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\
657
+\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3d\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\
658
+\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x02\0\0\0\x57\0\0\0\
659
+\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\
660
+\0\0\x3e\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
661
+\x50\0\0\0\0\0\0\x77\0\0\0\x01\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
662
+\x4f\x03\0\0\0\0\0\0\x57\x04\0\0\x01\0\0\0\x87\x04\0\0\0\0\0\0\x5f\x34\0\0\0\0\
663
+\0\0\xaf\x42\0\0\0\0\0\0\x57\x05\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x53\0\
664
+\0\0\0\0\0\x07\x01\0\0\x01\0\0\0\xbf\x25\0\0\0\0\0\0\x15\x01\x0b\0\x24\0\0\0\
665
+\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\xa0\xff\xff\xff\x0f\x12\0\0\0\0\0\0\x71\x24\0\
666
+\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x38\0\0\0\xc7\0\0\0\x38\0\0\0\xb7\x02\
667
+\0\0\0\0\0\0\x65\0\xa9\xff\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\x05\0\xa7\xff\0\
668
+\0\0\0\xbf\x21\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x15\x01\
669
+\x0e\0\0\0\0\0\x71\x63\x06\0\0\0\0\0\x71\x64\x07\0\0\0\0\0\x67\x04\0\0\x08\0\0\
670
+\0\x4f\x34\0\0\0\0\0\0\x3f\x41\0\0\0\0\0\0\x2f\x41\0\0\0\0\0\0\x1f\x12\0\0\0\0\
671
+\0\0\x63\x2a\x50\xff\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x50\xff\xff\xff\
672
+\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\x55\0\x05\0\0\0\0\0\
673
+\x71\x61\x08\0\0\0\0\0\x71\x60\x09\0\0\0\0\0\x67\0\0\0\x08\0\0\0\x4f\x10\0\0\0\
674
+\0\0\0\x95\0\0\0\0\0\0\0\x69\0\0\0\0\0\0\0\x05\0\xfd\xff\0\0\0\0\x02\0\0\0\x04\
675
+\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x02\0\0\0\x04\0\0\0\x28\0\0\0\x01\0\0\0\0\0\
676
+\0\0\x02\0\0\0\x04\0\0\0\x02\0\0\0\x80\0\0\0\0\0\0\0\x47\x50\x4c\x20\x76\x32\0\
677
+\0\0\0\0\0\x10\0\0\0\0\0\0\0\x01\x7a\x52\0\x08\x7c\x0b\x01\x0c\0\0\0\x18\0\0\0\
678
+\x18\0\0\0\0\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
679
+\0\0\0\0\0\0\0\0\0\0\0\0\xa0\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
680
+\0\x60\x02\0\0\0\0\x03\0\x20\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3f\x02\0\0\0\0\
681
+\x03\0\xd0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xed\x01\0\0\0\0\x03\0\x10\x10\0\0\0\
682
+\0\0\0\0\0\0\0\0\0\0\0\xd4\x01\0\0\0\0\x03\0\x20\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\
683
+\0\xa3\x01\0\0\0\0\x03\0\xb8\x12\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x01\0\0\0\0\
684
+\x03\0\x48\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2a\x01\0\0\0\0\x03\0\x10\x13\0\0\0\
685
+\0\0\0\0\0\0\0\0\0\0\0\xe1\0\0\0\0\0\x03\0\xa0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
686
+\x2e\x02\0\0\0\0\x03\0\x28\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x68\x02\0\0\0\0\x03\
687
+\0\xc0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x36\x02\0\0\0\0\x03\0\xc8\x13\0\0\0\0\0\
688
+\0\0\0\0\0\0\0\0\0\x22\x01\0\0\0\0\x03\0\xe8\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
689
+\x02\x01\0\0\0\0\x03\0\x40\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd9\0\0\0\0\0\x03\0\
690
+\xf8\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x26\x02\0\0\0\0\x03\0\x20\x0e\0\0\0\0\0\0\
691
+\0\0\0\0\0\0\0\0\xcc\x01\0\0\0\0\x03\0\x60\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9b\
692
+\x01\0\0\0\0\x03\0\xc8\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5b\x01\0\0\0\0\x03\0\
693
+\x20\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x7c\x01\0\0\0\0\x03\0\x48\x08\0\0\0\0\0\0\
694
+\0\0\0\0\0\0\0\0\x53\x01\0\0\0\0\x03\0\xb8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\
695
+\x01\0\0\0\0\x03\0\xe0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x84\x01\0\0\0\0\x03\0\
696
+\xb8\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1e\x02\0\0\0\0\x03\0\xd8\x09\0\0\0\0\0\0\0\
697
+\0\0\0\0\0\0\0\xc4\x01\0\0\0\0\x03\0\x70\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x93\
698
+\x01\0\0\0\0\x03\0\xa8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\x01\0\0\0\0\x03\0\
699
+\xf0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4b\x01\0\0\0\0\x03\0\0\x0a\0\0\0\0\0\0\0\
700
+\0\0\0\0\0\0\0\x12\x01\0\0\0\0\x03\0\x10\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfa\0\
701
+\0\0\0\0\x03\0\xc0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x58\x02\0\0\0\0\x03\0\x88\
702
+\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x16\x02\0\0\0\0\x03\0\xb8\x0a\0\0\0\0\0\0\0\0\
703
+\0\0\0\0\0\0\xe5\x01\0\0\0\0\x03\0\xc0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbc\x01\
704
+\0\0\0\0\x03\0\0\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x8b\x01\0\0\0\0\x03\0\x18\x0e\
705
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd1\0\0\0\0\0\x03\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
706
+\0\0\x50\x02\0\0\0\0\x03\0\x20\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0e\x02\0\0\0\0\
707
+\x03\0\x48\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6c\x01\0\0\0\0\x03\0\xb0\x04\0\0\0\
708
+\0\0\0\0\0\0\0\0\0\0\0\x43\x01\0\0\0\0\x03\0\xc8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\
709
+\0\xc9\0\0\0\0\0\x03\0\xf8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\x02\0\0\0\0\x03\
710
+\0\xd0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3b\x01\0\0\0\0\x03\0\x98\x0b\0\0\0\0\0\
711
+\0\0\0\0\0\0\0\0\0\xf2\0\0\0\0\0\x03\0\xb8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\
712
+\x02\0\0\0\0\x03\0\xf0\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfe\x01\0\0\0\0\x03\0\
713
+\xf8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xdd\x01\0\0\0\0\x03\0\0\x0c\0\0\0\0\0\0\0\
714
+\0\0\0\0\0\0\0\xb4\x01\0\0\0\0\x03\0\x30\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0a\
715
+\x01\0\0\0\0\x03\0\x90\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc1\0\0\0\0\0\x03\0\xa8\
716
+\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xba\0\0\0\0\0\x03\0\xd0\x01\0\0\0\0\0\0\0\0\0\
717
+\0\0\0\0\0\xf6\x01\0\0\0\0\x03\0\xe0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xac\x01\0\
718
+\0\0\0\x03\0\x30\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x33\x01\0\0\0\0\x03\0\x80\x0e\
719
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xea\0\0\0\0\0\x03\0\x98\x0e\0\0\0\0\0\0\0\0\0\0\0\
720
+\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6b\0\0\0\x11\0\x06\
721
+\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x25\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\x14\
722
+\0\0\0\0\0\0\0\x82\0\0\0\x11\0\x05\0\x28\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x01\0\
723
+\0\0\x11\0\x05\0\x14\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x40\0\0\0\x12\0\x03\0\0\0\
724
+\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\x28\0\0\0\0\0\0\0\x01\0\0\0\x3a\0\0\0\x50\0\0\
725
+\0\0\0\0\0\x01\0\0\0\x3c\0\0\0\x80\x13\0\0\0\0\0\0\x01\0\0\0\x3b\0\0\0\x1c\0\0\
726
+\0\0\0\0\0\x01\0\0\0\x38\0\0\0\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\
727
+\x5f\x74\x6f\x65\x70\x6c\x69\x74\x7a\x5f\x6b\x65\x79\0\x2e\x74\x65\x78\x74\0\
728
+\x6d\x61\x70\x73\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x63\x6f\x6e\
729
+\x66\x69\x67\x75\x72\x61\x74\x69\x6f\x6e\x73\0\x74\x75\x6e\x5f\x72\x73\x73\x5f\
730
+\x73\x74\x65\x65\x72\x69\x6e\x67\x5f\x70\x72\x6f\x67\0\x2e\x72\x65\x6c\x74\x75\
731
+\x6e\x5f\x72\x73\x73\x5f\x73\x74\x65\x65\x72\x69\x6e\x67\0\x5f\x6c\x69\x63\x65\
732
+\x6e\x73\x65\0\x2e\x72\x65\x6c\x2e\x65\x68\x5f\x66\x72\x61\x6d\x65\0\x74\x61\
733
+\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x69\x6e\x64\x69\x72\x65\x63\x74\x69\
734
+\x6f\x6e\x5f\x74\x61\x62\x6c\x65\0\x72\x73\x73\x2e\x62\x70\x66\x2e\x63\0\x2e\
735
+\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x4c\x42\x42\x30\x5f\
736
+\x39\0\x4c\x42\x42\x30\x5f\x38\x39\0\x4c\x42\x42\x30\x5f\x36\x39\0\x4c\x42\x42\
737
+\x30\x5f\x35\x39\0\x4c\x42\x42\x30\x5f\x31\x39\0\x4c\x42\x42\x30\x5f\x31\x30\
738
+\x39\0\x4c\x42\x42\x30\x5f\x39\x38\0\x4c\x42\x42\x30\x5f\x37\x38\0\x4c\x42\x42\
739
+\x30\x5f\x34\x38\0\x4c\x42\x42\x30\x5f\x31\x38\0\x4c\x42\x42\x30\x5f\x38\x37\0\
740
+\x4c\x42\x42\x30\x5f\x34\x37\0\x4c\x42\x42\x30\x5f\x33\x37\0\x4c\x42\x42\x30\
741
+\x5f\x31\x37\0\x4c\x42\x42\x30\x5f\x31\x30\x37\0\x4c\x42\x42\x30\x5f\x39\x36\0\
742
+\x4c\x42\x42\x30\x5f\x37\x36\0\x4c\x42\x42\x30\x5f\x36\x36\0\x4c\x42\x42\x30\
743
+\x5f\x34\x36\0\x4c\x42\x42\x30\x5f\x33\x36\0\x4c\x42\x42\x30\x5f\x32\x36\0\x4c\
744
+\x42\x42\x30\x5f\x31\x30\x36\0\x4c\x42\x42\x30\x5f\x36\x35\0\x4c\x42\x42\x30\
745
+\x5f\x34\x35\0\x4c\x42\x42\x30\x5f\x33\x35\0\x4c\x42\x42\x30\x5f\x34\0\x4c\x42\
746
+\x42\x30\x5f\x35\x34\0\x4c\x42\x42\x30\x5f\x34\x34\0\x4c\x42\x42\x30\x5f\x32\
747
+\x34\0\x4c\x42\x42\x30\x5f\x31\x30\x34\0\x4c\x42\x42\x30\x5f\x39\x33\0\x4c\x42\
748
+\x42\x30\x5f\x38\x33\0\x4c\x42\x42\x30\x5f\x35\x33\0\x4c\x42\x42\x30\x5f\x34\
749
+\x33\0\x4c\x42\x42\x30\x5f\x32\x33\0\x4c\x42\x42\x30\x5f\x31\x30\x33\0\x4c\x42\
750
+\x42\x30\x5f\x38\x32\0\x4c\x42\x42\x30\x5f\x35\x32\0\x4c\x42\x42\x30\x5f\x31\
751
+\x30\x32\0\x4c\x42\x42\x30\x5f\x39\x31\0\x4c\x42\x42\x30\x5f\x38\x31\0\x4c\x42\
752
+\x42\x30\x5f\x37\x31\0\x4c\x42\x42\x30\x5f\x36\x31\0\x4c\x42\x42\x30\x5f\x35\
753
+\x31\0\x4c\x42\x42\x30\x5f\x34\x31\0\x4c\x42\x42\x30\x5f\x32\x31\0\x4c\x42\x42\
754
+\x30\x5f\x31\x31\0\x4c\x42\x42\x30\x5f\x31\x31\x31\0\x4c\x42\x42\x30\x5f\x31\
755
+\x30\x31\0\x4c\x42\x42\x30\x5f\x38\x30\0\x4c\x42\x42\x30\x5f\x36\x30\0\x4c\x42\
756
+\x42\x30\x5f\x35\x30\0\x4c\x42\x42\x30\x5f\x31\x30\0\x4c\x42\x42\x30\x5f\x31\
757
+\x31\x30\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
758
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\
759
+\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa0\x1a\0\0\0\0\0\0\x71\x02\0\
760
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\0\0\0\x01\0\0\
761
+\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
762
+\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5a\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
763
+\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\
764
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x56\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
765
+\0\x60\x1a\0\0\0\0\0\0\x30\0\0\0\0\0\0\0\x09\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\
766
+\x10\0\0\0\0\0\0\0\x20\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\
767
+\x14\0\0\0\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
768
+\0\0\0\x6c\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x54\x14\0\0\0\0\0\
769
+\0\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x78\0\0\
770
+\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x60\x14\0\0\0\0\0\0\x30\0\0\0\0\
771
+\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\0\0\0\x09\0\0\0\0\
772
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x90\x1a\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x09\0\0\0\
773
+\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xb2\0\0\0\x02\0\0\0\0\0\0\0\0\0\
774
+\0\0\0\0\0\0\0\0\0\0\x90\x14\0\0\0\0\0\0\xd0\x05\0\0\0\0\0\0\x01\0\0\0\x39\0\0\
775
+\0\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\0\0";
776
+
777
+    return 0;
778
+err:
779
+    bpf_object__destroy_skeleton(s);
780
+    return -1;
781
+}
782
+
783
+#endif /* __RSS_BPF_SKEL_H__ */
784
diff --git a/ebpf/trace-events b/ebpf/trace-events
785
new file mode 100644
786
index XXXXXXX..XXXXXXX
787
--- /dev/null
788
+++ b/ebpf/trace-events
789
@@ -XXX,XX +XXX,XX @@
790
+# See docs/devel/tracing.txt for syntax documentation.
791
+
792
+# ebpf-rss.c
793
+ebpf_error(const char *s1, const char *s2) "error in %s: %s"
794
diff --git a/ebpf/trace.h b/ebpf/trace.h
795
new file mode 100644
796
index XXXXXXX..XXXXXXX
797
--- /dev/null
798
+++ b/ebpf/trace.h
799
@@ -0,0 +1 @@
800
+#include "trace/trace-ebpf.h"
801
diff --git a/meson.build b/meson.build
19
index XXXXXXX..XXXXXXX 100644
802
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/net_rx_pkt.c
803
--- a/meson.build
21
+++ b/hw/net/net_rx_pkt.c
804
+++ b/meson.build
22
@@ -XXX,XX +XXX,XX @@ _net_rx_rss_prepare_tcp(uint8_t *rss_input,
805
@@ -XXX,XX +XXX,XX @@ if not get_option('fuse_lseek').disabled()
23
&tcphdr->th_dport, sizeof(uint16_t));
806
endif
24
}
807
endif
25
808
26
+static inline void
809
+# libbpf
27
+_net_rx_rss_prepare_udp(uint8_t *rss_input,
810
+libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
28
+ struct NetRxPkt *pkt,
811
+if libbpf.found() and not cc.links('''
29
+ size_t *bytes_written)
812
+ #include <bpf/libbpf.h>
30
+{
813
+ int main(void)
31
+ struct udp_header *udphdr = &pkt->l4hdr_info.hdr.udp;
814
+ {
32
+
815
+ bpf_object__destroy_skeleton(NULL);
33
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
816
+ return 0;
34
+ &udphdr->uh_sport, sizeof(uint16_t));
817
+ }''', dependencies: libbpf)
35
+
818
+ libbpf = not_found
36
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
819
+ if get_option('bpf').enabled()
37
+ &udphdr->uh_dport, sizeof(uint16_t));
820
+ error('libbpf skeleton test failed')
38
+}
821
+ else
39
+
822
+ warning('libbpf skeleton test failed, disabling')
40
uint32_t
823
+ endif
41
net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
824
+endif
42
NetRxPktRssType type,
825
+
43
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
826
if get_option('cfi')
44
trace_net_rx_pkt_rss_ip6_ex();
827
cfi_flags=[]
45
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
828
# Check for dependency on LTO
46
break;
829
@@ -XXX,XX +XXX,XX @@ endif
47
+ case NetPktRssIpV6TcpEx:
830
config_host_data.set('CONFIG_GTK', gtk.found())
48
+ assert(pkt->isip6);
831
config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
49
+ assert(pkt->istcp);
832
config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
50
+ trace_net_rx_pkt_rss_ip6_ex_tcp();
833
+config_host_data.set('CONFIG_EBPF', libbpf.found())
51
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
834
config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
52
+ _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
835
config_host_data.set('CONFIG_LIBNFS', libnfs.found())
53
+ break;
836
config_host_data.set('CONFIG_RBD', rbd.found())
54
+ case NetPktRssIpV4Udp:
837
@@ -XXX,XX +XXX,XX @@ if have_system
55
+ assert(pkt->isip4);
838
'backends',
56
+ assert(pkt->isudp);
839
'backends/tpm',
57
+ trace_net_rx_pkt_rss_ip4_udp();
840
'chardev',
58
+ _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
841
+ 'ebpf',
59
+ _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
842
'hw/9pfs',
60
+ break;
843
'hw/acpi',
61
+ case NetPktRssIpV6Udp:
844
'hw/adc',
62
+ assert(pkt->isip6);
845
@@ -XXX,XX +XXX,XX @@ subdir('accel')
63
+ assert(pkt->isudp);
846
subdir('plugins')
64
+ trace_net_rx_pkt_rss_ip6_udp();
847
subdir('bsd-user')
65
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
848
subdir('linux-user')
66
+ _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
849
+subdir('ebpf')
67
+ break;
850
+
68
+ case NetPktRssIpV6UdpEx:
851
+common_ss.add(libbpf)
69
+ assert(pkt->isip6);
852
70
+ assert(pkt->isudp);
853
bsd_user_ss.add(files('gdbstub.c'))
71
+ trace_net_rx_pkt_rss_ip6_ex_udp();
854
specific_ss.add_all(when: 'CONFIG_BSD_USER', if_true: bsd_user_ss)
72
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
855
@@ -XXX,XX +XXX,XX @@ summary_info += {'RDMA support': config_host.has_key('CONFIG_RDMA')}
73
+ _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
856
summary_info += {'PVRDMA support': config_host.has_key('CONFIG_PVRDMA')}
74
+ break;
857
summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
75
default:
858
summary_info += {'libcap-ng support': libcap_ng.found()}
76
assert(false);
859
+summary_info += {'bpf support': libbpf.found()}
77
break;
860
# TODO: add back protocol and server version
78
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
861
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
862
summary_info += {'rbd support': rbd.found()}
863
diff --git a/meson_options.txt b/meson_options.txt
79
index XXXXXXX..XXXXXXX 100644
864
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/net/net_rx_pkt.h
865
--- a/meson_options.txt
81
+++ b/hw/net/net_rx_pkt.h
866
+++ b/meson_options.txt
82
@@ -XXX,XX +XXX,XX @@ typedef enum {
867
@@ -XXX,XX +XXX,XX @@ option('bzip2', type : 'feature', value : 'auto',
83
NetPktRssIpV4Tcp,
868
description: 'bzip2 support for DMG images')
84
NetPktRssIpV6Tcp,
869
option('cap_ng', type : 'feature', value : 'auto',
85
NetPktRssIpV6,
870
description: 'cap_ng support')
86
- NetPktRssIpV6Ex
871
+option('bpf', type : 'feature', value : 'auto',
87
+ NetPktRssIpV6Ex,
872
+ description: 'eBPF support')
88
+ NetPktRssIpV6TcpEx,
873
option('cocoa', type : 'feature', value : 'auto',
89
+ NetPktRssIpV4Udp,
874
description: 'Cocoa user interface (macOS only)')
90
+ NetPktRssIpV6Udp,
875
option('curl', type : 'feature', value : 'auto',
91
+ NetPktRssIpV6UdpEx,
92
} NetRxPktRssType;
93
94
/**
95
diff --git a/hw/net/trace-events b/hw/net/trace-events
96
index XXXXXXX..XXXXXXX 100644
97
--- a/hw/net/trace-events
98
+++ b/hw/net/trace-events
99
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_l3_csum_validate_csum(size_t l3hdr_off, uint32_t csl, uint32_t cntr,
100
101
net_rx_pkt_rss_ip4(void) "Calculating IPv4 RSS hash"
102
net_rx_pkt_rss_ip4_tcp(void) "Calculating IPv4/TCP RSS hash"
103
+net_rx_pkt_rss_ip4_udp(void) "Calculating IPv4/UDP RSS hash"
104
net_rx_pkt_rss_ip6_tcp(void) "Calculating IPv6/TCP RSS hash"
105
+net_rx_pkt_rss_ip6_udp(void) "Calculating IPv6/UDP RSS hash"
106
net_rx_pkt_rss_ip6(void) "Calculating IPv6 RSS hash"
107
net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
108
+net_rx_pkt_rss_ip6_ex_tcp(void) "Calculating IPv6/EX/TCP RSS hash"
109
+net_rx_pkt_rss_ip6_ex_udp(void) "Calculating IPv6/EX/UDP RSS hash"
110
net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X"
111
net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes"
112
113
--
876
--
114
2.5.0
877
2.7.4
115
878
116
879
diff view generated by jsdifflib
1
From: Lukas Straub <lukasstraub2@web.de>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
To switch the Secondary to Primary, we need to insert new filters
3
When RSS is enabled the device tries to load the eBPF program
4
before the filter-rewriter.
4
to select RX virtqueue in the TUN. If eBPF can be loaded
5
5
the RSS will function also with vhost (works with kernel 5.8 and later).
6
Add the options insert= and position= to be able to insert filters
6
Software RSS is used as a fallback with vhost=off when eBPF can't be loaded
7
anywhere in the filter list.
7
or when hash population requested by the guest.
8
8
9
position should be "head" or "tail" to insert at the head or
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
10
tail of the filter list or it should be "id=<id>" to specify
10
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
11
the id of another filter.
12
insert should be either "before" or "behind" to specify where to
13
insert the new filter relative to the one specified with position.
14
15
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
16
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
12
---
19
include/net/filter.h | 2 ++
13
hw/net/vhost_net.c | 3 ++
20
net/filter.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++-
14
hw/net/virtio-net.c | 116 +++++++++++++++++++++++++++++++++++++++--
21
qemu-options.hx | 31 +++++++++++++++---
15
include/hw/virtio/virtio-net.h | 4 ++
22
3 files changed, 119 insertions(+), 6 deletions(-)
16
net/vhost-vdpa.c | 2 +
23
17
4 files changed, 122 insertions(+), 3 deletions(-)
24
diff --git a/include/net/filter.h b/include/net/filter.h
18
25
index XXXXXXX..XXXXXXX 100644
19
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
26
--- a/include/net/filter.h
20
index XXXXXXX..XXXXXXX 100644
27
+++ b/include/net/filter.h
21
--- a/hw/net/vhost_net.c
28
@@ -XXX,XX +XXX,XX @@ struct NetFilterState {
22
+++ b/hw/net/vhost_net.c
29
NetClientState *netdev;
23
@@ -XXX,XX +XXX,XX @@ static const int kernel_feature_bits[] = {
30
NetFilterDirection direction;
24
VIRTIO_NET_F_MTU,
31
bool on;
25
VIRTIO_F_IOMMU_PLATFORM,
32
+ char *position;
26
VIRTIO_F_RING_PACKED,
33
+ bool insert_before_flag;
27
+ VIRTIO_NET_F_HASH_REPORT,
34
QTAILQ_ENTRY(NetFilterState) next;
28
VHOST_INVALID_FEATURE_BIT
35
};
29
};
36
30
37
diff --git a/net/filter.c b/net/filter.c
31
@@ -XXX,XX +XXX,XX @@ static const int user_feature_bits[] = {
38
index XXXXXXX..XXXXXXX 100644
32
VIRTIO_NET_F_MTU,
39
--- a/net/filter.c
33
VIRTIO_F_IOMMU_PLATFORM,
40
+++ b/net/filter.c
34
VIRTIO_F_RING_PACKED,
41
@@ -XXX,XX +XXX,XX @@ static void netfilter_set_status(Object *obj, const char *str, Error **errp)
35
+ VIRTIO_NET_F_RSS,
42
}
36
+ VIRTIO_NET_F_HASH_REPORT,
43
}
37
44
38
/* This bit implies RARP isn't sent by QEMU out of band */
45
+static char *netfilter_get_position(Object *obj, Error **errp)
39
VIRTIO_NET_F_GUEST_ANNOUNCE,
46
+{
40
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
47
+ NetFilterState *nf = NETFILTER(obj);
41
index XXXXXXX..XXXXXXX 100644
48
+
42
--- a/hw/net/virtio-net.c
49
+ return g_strdup(nf->position);
43
+++ b/hw/net/virtio-net.c
50
+}
44
@@ -XXX,XX +XXX,XX @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
51
+
45
return features;
52
+static void netfilter_set_position(Object *obj, const char *str, Error **errp)
46
}
53
+{
47
54
+ NetFilterState *nf = NETFILTER(obj);
48
- virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
55
+
49
- virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
56
+ nf->position = g_strdup(str);
50
+ if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
57
+}
51
+ virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
58
+
52
+ }
59
+static char *netfilter_get_insert(Object *obj, Error **errp)
53
features = vhost_net_get_features(get_vhost_net(nc->peer), features);
60
+{
54
vdev->backend_features = features;
61
+ NetFilterState *nf = NETFILTER(obj);
55
62
+
56
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
63
+ return nf->insert_before_flag ? g_strdup("before") : g_strdup("behind");
57
}
64
+}
58
}
65
+
59
66
+static void netfilter_set_insert(Object *obj, const char *str, Error **errp)
60
+static void virtio_net_detach_epbf_rss(VirtIONet *n);
67
+{
61
+
68
+ NetFilterState *nf = NETFILTER(obj);
62
static void virtio_net_disable_rss(VirtIONet *n)
69
+
70
+ if (strcmp(str, "before") && strcmp(str, "behind")) {
71
+ error_setg(errp, "Invalid value for netfilter insert, "
72
+ "should be 'before' or 'behind'");
73
+ return;
74
+ }
75
+
76
+ nf->insert_before_flag = !strcmp(str, "before");
77
+}
78
+
79
static void netfilter_init(Object *obj)
80
{
63
{
81
NetFilterState *nf = NETFILTER(obj);
64
if (n->rss_data.enabled) {
82
65
trace_virtio_net_rss_disable();
83
nf->on = true;
66
}
84
+ nf->insert_before_flag = false;
67
n->rss_data.enabled = false;
85
+ nf->position = g_strdup("tail");
68
+
86
69
+ virtio_net_detach_epbf_rss(n);
87
object_property_add_str(obj, "netdev",
70
+}
88
netfilter_get_netdev_id, netfilter_set_netdev_id,
71
+
89
@@ -XXX,XX +XXX,XX @@ static void netfilter_init(Object *obj)
72
+static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
90
object_property_add_str(obj, "status",
73
+{
91
netfilter_get_status, netfilter_set_status,
74
+ NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
92
NULL);
75
+ if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
93
+ object_property_add_str(obj, "position",
76
+ return false;
94
+ netfilter_get_position, netfilter_set_position,
77
+ }
95
+ NULL);
78
+
96
+ object_property_add_str(obj, "insert",
79
+ return nc->info->set_steering_ebpf(nc, prog_fd);
97
+ netfilter_get_insert, netfilter_set_insert,
80
+}
98
+ NULL);
81
+
99
}
82
+static void rss_data_to_rss_config(struct VirtioNetRssData *data,
100
83
+ struct EBPFRSSConfig *config)
101
static void netfilter_complete(UserCreatable *uc, Error **errp)
84
+{
102
{
85
+ config->redirect = data->redirect;
103
NetFilterState *nf = NETFILTER(uc);
86
+ config->populate_hash = data->populate_hash;
104
+ NetFilterState *position = NULL;
87
+ config->hash_types = data->hash_types;
105
NetClientState *ncs[MAX_QUEUE_NUM];
88
+ config->indirections_len = data->indirections_len;
106
NetFilterClass *nfc = NETFILTER_GET_CLASS(uc);
89
+ config->default_queue = data->default_queue;
107
int queues;
90
+}
108
@@ -XXX,XX +XXX,XX @@ static void netfilter_complete(UserCreatable *uc, Error **errp)
91
+
109
return;
92
+static bool virtio_net_attach_epbf_rss(VirtIONet *n)
110
}
93
+{
111
94
+ struct EBPFRSSConfig config = {};
112
+ if (strcmp(nf->position, "head") && strcmp(nf->position, "tail")) {
95
+
113
+ Object *container;
96
+ if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
114
+ Object *obj;
97
+ return false;
115
+ char *position_id;
98
+ }
116
+
99
+
117
+ if (!g_str_has_prefix(nf->position, "id=")) {
100
+ rss_data_to_rss_config(&n->rss_data, &config);
118
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "position",
101
+
119
+ "'head', 'tail' or 'id=<id>'");
102
+ if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
120
+ return;
103
+ n->rss_data.indirections_table, n->rss_data.key)) {
104
+ return false;
105
+ }
106
+
107
+ if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
108
+ return false;
109
+ }
110
+
111
+ return true;
112
+}
113
+
114
+static void virtio_net_detach_epbf_rss(VirtIONet *n)
115
+{
116
+ virtio_net_attach_ebpf_to_backend(n->nic, -1);
117
+}
118
+
119
+static bool virtio_net_load_ebpf(VirtIONet *n)
120
+{
121
+ if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
122
+ /* backend does't support steering ebpf */
123
+ return false;
124
+ }
125
+
126
+ return ebpf_rss_load(&n->ebpf_rss);
127
+}
128
+
129
+static void virtio_net_unload_ebpf(VirtIONet *n)
130
+{
131
+ virtio_net_attach_ebpf_to_backend(n->nic, -1);
132
+ ebpf_rss_unload(&n->ebpf_rss);
133
}
134
135
static uint16_t virtio_net_handle_rss(VirtIONet *n,
136
@@ -XXX,XX +XXX,XX @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
137
goto error;
138
}
139
n->rss_data.enabled = true;
140
+
141
+ if (!n->rss_data.populate_hash) {
142
+ if (!virtio_net_attach_epbf_rss(n)) {
143
+ /* EBPF must be loaded for vhost */
144
+ if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
145
+ warn_report("Can't load eBPF RSS for vhost");
146
+ goto error;
147
+ }
148
+ /* fallback to software RSS */
149
+ warn_report("Can't load eBPF RSS - fallback to software RSS");
150
+ n->rss_data.enabled_software_rss = true;
121
+ }
151
+ }
122
+
152
+ } else {
123
+ /* get the id from the string */
153
+ /* use software RSS for hash populating */
124
+ position_id = g_strndup(nf->position + 3, strlen(nf->position) - 3);
154
+ /* and detach eBPF if was loaded before */
125
+
155
+ virtio_net_detach_epbf_rss(n);
126
+ /* Search for the position to insert before/behind */
156
+ n->rss_data.enabled_software_rss = true;
127
+ container = object_get_objects_root();
157
+ }
128
+ obj = object_resolve_path_component(container, position_id);
158
+
129
+ if (!obj) {
159
trace_virtio_net_rss_enable(n->rss_data.hash_types,
130
+ error_setg(errp, "filter '%s' not found", position_id);
160
n->rss_data.indirections_len,
131
+ g_free(position_id);
161
temp.b);
132
+ return;
162
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
163
return -1;
164
}
165
166
- if (!no_rss && n->rss_data.enabled) {
167
+ if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
168
int index = virtio_net_process_rss(nc, buf, size);
169
if (index >= 0) {
170
NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
171
@@ -XXX,XX +XXX,XX @@ static int virtio_net_post_load_device(void *opaque, int version_id)
172
}
173
174
if (n->rss_data.enabled) {
175
+ n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
176
+ if (!n->rss_data.populate_hash) {
177
+ if (!virtio_net_attach_epbf_rss(n)) {
178
+ if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
179
+ warn_report("Can't post-load eBPF RSS for vhost");
180
+ } else {
181
+ warn_report("Can't post-load eBPF RSS - "
182
+ "fallback to software RSS");
183
+ n->rss_data.enabled_software_rss = true;
184
+ }
185
+ }
133
+ }
186
+ }
134
+
187
+
135
+ position = NETFILTER(obj);
188
trace_virtio_net_rss_enable(n->rss_data.hash_types,
136
+
189
n->rss_data.indirections_len,
137
+ if (position->netdev != ncs[0]) {
190
sizeof(n->rss_data.key));
138
+ error_setg(errp, "filter '%s' belongs to a different netdev",
191
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
139
+ position_id);
192
n->qdev = dev;
140
+ g_free(position_id);
193
141
+ return;
194
net_rx_pkt_init(&n->rx_pkt, false);
142
+ }
195
+
143
+
196
+ if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
144
+ g_free(position_id);
197
+ virtio_net_load_ebpf(n);
145
+ }
198
+ }
146
+
199
}
147
nf->netdev = ncs[0];
200
148
201
static void virtio_net_device_unrealize(DeviceState *dev)
149
if (nfc->setup) {
202
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_unrealize(DeviceState *dev)
150
@@ -XXX,XX +XXX,XX @@ static void netfilter_complete(UserCreatable *uc, Error **errp)
203
VirtIONet *n = VIRTIO_NET(dev);
151
return;
204
int i, max_queues;
152
}
205
153
}
206
+ if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
154
- QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
207
+ virtio_net_unload_ebpf(n);
155
+
208
+ }
156
+ if (position) {
209
+
157
+ if (nf->insert_before_flag) {
210
/* This will stop vhost backend if appropriate. */
158
+ QTAILQ_INSERT_BEFORE(position, nf, next);
211
virtio_net_set_status(vdev, 0);
159
+ } else {
212
160
+ QTAILQ_INSERT_AFTER(&nf->netdev->filters, position, nf, next);
213
@@ -XXX,XX +XXX,XX @@ static void virtio_net_instance_init(Object *obj)
161
+ }
214
device_add_bootindex_property(obj, &n->nic_conf.bootindex,
162
+ } else if (!strcmp(nf->position, "head")) {
215
"bootindex", "/ethernet-phy@0",
163
+ QTAILQ_INSERT_HEAD(&nf->netdev->filters, nf, next);
216
DEVICE(n));
164
+ } else if (!strcmp(nf->position, "tail")) {
217
+
165
+ QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
218
+ ebpf_rss_init(&n->ebpf_rss);
166
+ }
219
}
167
}
220
168
221
static int virtio_net_pre_save(void *opaque)
169
static void netfilter_finalize(Object *obj)
222
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
170
@@ -XXX,XX +XXX,XX @@ static void netfilter_finalize(Object *obj)
223
index XXXXXXX..XXXXXXX 100644
171
QTAILQ_REMOVE(&nf->netdev->filters, nf, next);
224
--- a/include/hw/virtio/virtio-net.h
172
}
225
+++ b/include/hw/virtio/virtio-net.h
173
g_free(nf->netdev_id);
226
@@ -XXX,XX +XXX,XX @@
174
+ g_free(nf->position);
227
#include "qemu/option_int.h"
175
}
228
#include "qom/object.h"
176
229
177
static void default_handle_event(NetFilterState *nf, int event, Error **errp)
230
+#include "ebpf/ebpf_rss.h"
178
diff --git a/qemu-options.hx b/qemu-options.hx
231
+
179
index XXXXXXX..XXXXXXX 100644
232
#define TYPE_VIRTIO_NET "virtio-net-device"
180
--- a/qemu-options.hx
233
OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
181
+++ b/qemu-options.hx
234
182
@@ -XXX,XX +XXX,XX @@ applications, they can do this through this parameter. Its format is
235
@@ -XXX,XX +XXX,XX @@ typedef struct VirtioNetRscChain {
183
a gnutls priority string as described at
236
184
@url{https://gnutls.org/manual/html_node/Priority-Strings.html}.
237
typedef struct VirtioNetRssData {
185
238
bool enabled;
186
-@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}][,status=@var{on|off}]
239
+ bool enabled_software_rss;
187
+@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}][,status=@var{on|off}][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
240
bool redirect;
188
241
bool populate_hash;
189
Interval @var{t} can't be 0, this filter batches the packet delivery: all
242
uint32_t hash_types;
190
packets arriving in a given interval on netdev @var{netdevid} are delayed
243
@@ -XXX,XX +XXX,XX @@ struct VirtIONet {
191
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
244
Notifier migration_state;
192
@option{tx}: the filter is attached to the transmit queue of the netdev,
245
VirtioNetRssData rss_data;
193
where it will receive packets sent by the netdev.
246
struct NetRxPkt *rx_pkt;
194
247
+ struct EBPFRSSContext ebpf_rss;
195
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
248
};
196
+position @var{head|tail|id=<id>} is an option to specify where the
249
197
+filter should be inserted in the filter list. It can be applied to any
250
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
198
+netfilter.
251
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
199
+
252
index XXXXXXX..XXXXXXX 100644
200
+@option{head}: the filter is inserted at the head of the filter
253
--- a/net/vhost-vdpa.c
201
+ list, before any existing filters.
254
+++ b/net/vhost-vdpa.c
202
+
255
@@ -XXX,XX +XXX,XX @@ const int vdpa_feature_bits[] = {
203
+@option{tail}: the filter is inserted at the tail of the filter
256
VIRTIO_NET_F_MTU,
204
+ list, behind any existing filters (default).
257
VIRTIO_F_IOMMU_PLATFORM,
205
+
258
VIRTIO_F_RING_PACKED,
206
+@option{id=<id>}: the filter is inserted before or behind the filter
259
+ VIRTIO_NET_F_RSS,
207
+ specified by <id>, see the insert option below.
260
+ VIRTIO_NET_F_HASH_REPORT,
208
+
261
VIRTIO_NET_F_GUEST_ANNOUNCE,
209
+insert @var{behind|before} is an option to specify where to insert the
262
VIRTIO_NET_F_STATUS,
210
+new filter relative to the one specified with position=id=<id>. It can
263
VHOST_INVALID_FEATURE_BIT
211
+be applied to any netfilter.
212
+
213
+@option{before}: insert before the specified filter.
214
+
215
+@option{behind}: insert behind the specified filter (default).
216
+
217
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
218
219
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
220
221
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
222
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
223
224
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
225
@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
226
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
227
be the same. we can just use indev or outdev, but at least one of indev or outdev
228
need to be specified.
229
230
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
231
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
232
233
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
234
secondary from primary to keep secondary tcp connection,and rewrite
235
@@ -XXX,XX +XXX,XX @@ colo secondary:
236
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
237
-object filter-rewriter,id=rew0,netdev=hn0,queue=all
238
239
-@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}]
240
+@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
241
242
Dump the network traffic on netdev @var{dev} to the file specified by
243
@var{filename}. At most @var{len} bytes (64k by default) per packet are stored.
244
--
264
--
245
2.5.0
265
2.7.4
246
266
247
267
diff view generated by jsdifflib
1
From: Lukas Straub <lukasstraub2@web.de>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
2
3
Document the qemu command-line and qmp commands for continuous replication
3
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
4
4
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
5
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
6
---
8
docs/COLO-FT.txt | 224 +++++++++++++++++++++++++++++++++------------
7
docs/devel/ebpf_rss.rst | 125 ++++++++++++++++++++++++++++++++++++++++++++++++
9
docs/block-replication.txt | 28 ++++--
8
docs/devel/index.rst | 1 +
10
2 files changed, 184 insertions(+), 68 deletions(-)
9
2 files changed, 126 insertions(+)
10
create mode 100644 docs/devel/ebpf_rss.rst
11
11
12
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
12
diff --git a/docs/devel/ebpf_rss.rst b/docs/devel/ebpf_rss.rst
13
index XXXXXXX..XXXXXXX 100644
13
new file mode 100644
14
--- a/docs/COLO-FT.txt
14
index XXXXXXX..XXXXXXX
15
+++ b/docs/COLO-FT.txt
15
--- /dev/null
16
@@ -XXX,XX +XXX,XX @@ The diagram just shows the main qmp command, you can get the detail
16
+++ b/docs/devel/ebpf_rss.rst
17
in test procedure.
17
@@ -XXX,XX +XXX,XX @@
18
18
+===========================
19
== Test procedure ==
19
+eBPF RSS virtio-net support
20
-1. Startup qemu
20
+===========================
21
-Primary:
22
-# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name primary \
23
- -device piix3-usb-uhci -vnc :7 \
24
- -device usb-tablet -netdev tap,id=hn0,vhost=off \
25
- -device virtio-net-pci,id=net-pci0,netdev=hn0 \
26
- -drive if=virtio,id=primary-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
27
- children.0.file.filename=1.raw,\
28
- children.0.driver=raw -S
29
-Secondary:
30
-# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name secondary \
31
- -device piix3-usb-uhci -vnc :7 \
32
- -device usb-tablet -netdev tap,id=hn0,vhost=off \
33
- -device virtio-net-pci,id=net-pci0,netdev=hn0 \
34
- -drive if=none,id=secondary-disk0,file.filename=1.raw,driver=raw,node-name=node0 \
35
- -drive if=virtio,id=active-disk0,driver=replication,mode=secondary,\
36
- file.driver=qcow2,top-id=active-disk0,\
37
- file.file.filename=/mnt/ramfs/active_disk.img,\
38
- file.backing.driver=qcow2,\
39
- file.backing.file.filename=/mnt/ramfs/hidden_disk.img,\
40
- file.backing.backing=secondary-disk0 \
41
- -incoming tcp:0:8888
42
-
43
-2. On Secondary VM's QEMU monitor, issue command
44
+Note: Here we are running both instances on the same host for testing,
45
+change the IP Addresses if you want to run it on two hosts. Initally
46
+127.0.0.1 is the Primary Host and 127.0.0.2 is the Secondary Host.
47
+
21
+
48
+== Startup qemu ==
22
+RSS(Receive Side Scaling) is used to distribute network packets to guest virtqueues
49
+1. Primary:
23
+by calculating packet hash. Usually every queue is processed then by a specific guest CPU core.
50
+Note: Initally, $imagefolder/primary.qcow2 needs to be copied to all hosts.
51
+You don't need to change any IP's here, because 0.0.0.0 listens on any
52
+interface. The chardev's with 127.0.0.1 IP's loopback to the local qemu
53
+instance.
54
+
24
+
55
+# imagefolder="/mnt/vms/colo-test-primary"
25
+For now there are 2 RSS implementations in qemu:
26
+- 'in-qemu' RSS (functions if qemu receives network packets, i.e. vhost=off)
27
+- eBPF RSS (can function with also with vhost=on)
56
+
28
+
57
+# qemu-system-x86_64 -enable-kvm -cpu qemu64,+kvmclock -m 512 -smp 1 -qmp stdio \
29
+eBPF support (CONFIG_EBPF) is enabled by 'configure' script.
58
+ -device piix3-usb-uhci -device usb-tablet -name primary \
30
+To enable eBPF RSS support use './configure --enable-bpf'.
59
+ -netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \
60
+ -device rtl8139,id=e0,netdev=hn0 \
61
+ -chardev socket,id=mirror0,host=0.0.0.0,port=9003,server,nowait \
62
+ -chardev socket,id=compare1,host=0.0.0.0,port=9004,server,wait \
63
+ -chardev socket,id=compare0,host=127.0.0.1,port=9001,server,nowait \
64
+ -chardev socket,id=compare0-0,host=127.0.0.1,port=9001 \
65
+ -chardev socket,id=compare_out,host=127.0.0.1,port=9005,server,nowait \
66
+ -chardev socket,id=compare_out0,host=127.0.0.1,port=9005 \
67
+ -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 \
68
+ -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out \
69
+ -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 \
70
+ -object iothread,id=iothread1 \
71
+ -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,\
72
+outdev=compare_out0,iothread=iothread1 \
73
+ -drive if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
74
+children.0.file.filename=$imagefolder/primary.qcow2,children.0.driver=qcow2 -S
75
+
31
+
76
+2. Secondary:
32
+If steering BPF is not set for kernel's TUN module, the TUN uses automatic selection
77
+Note: Active and hidden images need to be created only once and the
33
+of rx virtqueue based on lookup table built according to calculated symmetric hash
78
+size should be the same as primary.qcow2. Again, you don't need to change
34
+of transmitted packets.
79
+any IP's here, except for the $primary_ip variable.
35
+If steering BPF is set for TUN the BPF code calculates the hash of packet header and
36
+returns the virtqueue number to place the packet to.
80
+
37
+
81
+# imagefolder="/mnt/vms/colo-test-secondary"
38
+Simplified decision formula:
82
+# primary_ip=127.0.0.1
83
+
39
+
84
+# qemu-img create -f qcow2 $imagefolder/secondary-active.qcow2 10G
40
+.. code:: C
85
+
41
+
86
+# qemu-img create -f qcow2 $imagefolder/secondary-hidden.qcow2 10G
42
+ queue_index = indirection_table[hash(<packet data>)%<indirection_table size>]
87
+
88
+# qemu-system-x86_64 -enable-kvm -cpu qemu64,+kvmclock -m 512 -smp 1 -qmp stdio \
89
+ -device piix3-usb-uhci -device usb-tablet -name secondary \
90
+ -netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \
91
+ -device rtl8139,id=e0,netdev=hn0 \
92
+ -chardev socket,id=red0,host=$primary_ip,port=9003,reconnect=1 \
93
+ -chardev socket,id=red1,host=$primary_ip,port=9004,reconnect=1 \
94
+ -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \
95
+ -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \
96
+ -object filter-rewriter,id=rew0,netdev=hn0,queue=all \
97
+ -drive if=none,id=parent0,file.filename=$imagefolder/primary.qcow2,driver=qcow2 \
98
+ -drive if=none,id=childs0,driver=replication,mode=secondary,file.driver=qcow2,\
99
+top-id=colo-disk0,file.file.filename=$imagefolder/secondary-active.qcow2,\
100
+file.backing.driver=qcow2,file.backing.file.filename=$imagefolder/secondary-hidden.qcow2,\
101
+file.backing.backing=parent0 \
102
+ -drive if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
103
+children.0=childs0 \
104
+ -incoming tcp:0.0.0.0:9998
105
+
43
+
106
+
44
+
107
+3. On Secondary VM's QEMU monitor, issue command
45
+Not for all packets, the hash can/should be calculated.
108
{'execute':'qmp_capabilities'}
109
-{ 'execute': 'nbd-server-start',
110
- 'arguments': {'addr': {'type': 'inet', 'data': {'host': 'xx.xx.xx.xx', 'port': '8889'} } }
111
-}
112
-{'execute': 'nbd-server-add', 'arguments': {'device': 'secondary-disk0', 'writable': true } }
113
+{'execute': 'nbd-server-start', 'arguments': {'addr': {'type': 'inet', 'data': {'host': '0.0.0.0', 'port': '9999'} } } }
114
+{'execute': 'nbd-server-add', 'arguments': {'device': 'parent0', 'writable': true } }
115
116
Note:
117
a. The qmp command nbd-server-start and nbd-server-add must be run
118
before running the qmp command migrate on primary QEMU
119
b. Active disk, hidden disk and nbd target's length should be the
120
same.
121
- c. It is better to put active disk and hidden disk in ramdisk.
122
+ c. It is better to put active disk and hidden disk in ramdisk. They
123
+ will be merged into the parent disk on failover.
124
125
-3. On Primary VM's QEMU monitor, issue command:
126
+4. On Primary VM's QEMU monitor, issue command:
127
{'execute':'qmp_capabilities'}
128
-{ 'execute': 'human-monitor-command',
129
- 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xx.xx.xx.xx,file.port=8889,file.export=secondary-disk0,node-name=nbd_client0'}}
130
-{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'primary-disk0', 'node': 'nbd_client0' } }
131
-{ 'execute': 'migrate-set-capabilities',
132
- 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
133
-{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:xx.xx.xx.xx:8888' } }
134
+{'execute': 'human-monitor-command', 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
135
+{'execute': 'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': 'replication0' } }
136
+{'execute': 'migrate-set-capabilities', 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
137
+{'execute': 'migrate', 'arguments': {'uri': 'tcp:127.0.0.2:9998' } }
138
139
Note:
140
a. There should be only one NBD Client for each primary disk.
141
- b. xx.xx.xx.xx is the secondary physical machine's hostname or IP
142
- c. The qmp command line must be run after running qmp command line in
143
+ b. The qmp command line must be run after running qmp command line in
144
secondary qemu.
145
146
-4. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
147
+5. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
148
You can issue command '{ "execute": "migrate-set-parameters" , "arguments":{ "x-checkpoint-delay": 2000 } }'
149
-to change the checkpoint period time
150
+to change the idle checkpoint period time
151
+
46
+
152
+6. Failover test
47
+Note: currently, eBPF RSS does not support hash reporting.
153
+You can kill one of the VMs and Failover on the surviving VM:
154
+
48
+
155
+If you killed the Secondary, then follow "Primary Failover". After that,
49
+eBPF RSS turned on by different combinations of vhost-net, vitrio-net and tap configurations:
156
+if you want to resume the replication, follow "Primary resume replication"
157
+
50
+
158
+If you killed the Primary, then follow "Secondary Failover". After that,
51
+- eBPF is used:
159
+if you want to resume the replication, follow "Secondary resume replication"
160
+
52
+
161
+== Primary Failover ==
53
+ tap,vhost=off & virtio-net-pci,rss=on,hash=off
162
+The Secondary died, resume on the Primary
163
+
54
+
164
+{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'child': 'children.1'} }
55
+- eBPF is used:
165
+{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_del replication0' } }
166
+{'execute': 'object-del', 'arguments':{ 'id': 'comp0' } }
167
+{'execute': 'object-del', 'arguments':{ 'id': 'iothread1' } }
168
+{'execute': 'object-del', 'arguments':{ 'id': 'm0' } }
169
+{'execute': 'object-del', 'arguments':{ 'id': 'redire0' } }
170
+{'execute': 'object-del', 'arguments':{ 'id': 'redire1' } }
171
+{'execute': 'x-colo-lost-heartbeat' }
172
+
56
+
173
+== Secondary Failover ==
57
+ tap,vhost=on & virtio-net-pci,rss=on,hash=off
174
+The Primary died, resume on the Secondary and prepare to become the new Primary
175
+
58
+
176
+{'execute': 'nbd-server-stop'}
59
+- 'in-qemu' RSS is used:
177
+{'execute': 'x-colo-lost-heartbeat'}
178
+
60
+
179
+{'execute': 'object-del', 'arguments':{ 'id': 'f2' } }
61
+ tap,vhost=off & virtio-net-pci,rss=on,hash=on
180
+{'execute': 'object-del', 'arguments':{ 'id': 'f1' } }
181
+{'execute': 'chardev-remove', 'arguments':{ 'id': 'red1' } }
182
+{'execute': 'chardev-remove', 'arguments':{ 'id': 'red0' } }
183
+
62
+
184
+{'execute': 'chardev-add', 'arguments':{ 'id': 'mirror0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9003' } }, 'server': true } } } }
63
+- eBPF is used, hash population feature is not reported to the guest:
185
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare1', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9004' } }, 'server': true } } } }
186
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': true } } } }
187
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0-0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': false } } } }
188
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': true } } } }
189
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': false } } } }
190
+
64
+
191
+== Primary resume replication ==
65
+ tap,vhost=on & virtio-net-pci,rss=on,hash=on
192
+Resume replication after new Secondary is up.
193
+
66
+
194
+Start the new Secondary (Steps 2 and 3 above), then on the Primary:
67
+If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported.
195
+{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.2:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
68
+Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed to load or set to TUN.
196
+
69
+
197
+Wait until disk is synced, then:
70
+RSS eBPF program
198
+{'execute': 'stop'}
71
+----------------
199
+{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync'} }
200
+
72
+
201
+{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
73
+RSS program located in ebpf/rss.bpf.skeleton.h generated by bpftool.
202
+{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
74
+So the program is part of the qemu binary.
75
+Initially, the eBPF program was compiled by clang and source code located at tools/ebpf/rss.bpf.c.
76
+Prerequisites to recompile the eBPF program (regenerate ebpf/rss.bpf.skeleton.h):
203
+
77
+
204
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
78
+ llvm, clang, kernel source tree, bpftool
205
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
79
+ Adjust Makefile.ebpf to reflect the location of the kernel source tree
206
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
207
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
208
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
209
+
80
+
210
+{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
81
+ $ cd tools/ebpf
211
+{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.2:9998' } }
82
+ $ make -f Makefile.ebpf
212
+
83
+
213
+Note:
84
+Current eBPF RSS implementation uses 'bounded loops' with 'backward jump instructions' which present in the last kernels.
214
+If this Primary previously was a Secondary, then we need to insert the
85
+Overall eBPF RSS works on kernels 5.8+.
215
+filters before the filter-rewriter by using the
216
+"'insert': 'before', 'position': 'id=rew0'" Options. See below.
217
+
86
+
218
+== Secondary resume replication ==
87
+eBPF RSS implementation
219
+Become Primary and resume replication after new Secondary is up. Note
88
+-----------------------
220
+that now 127.0.0.1 is the Secondary and 127.0.0.2 is the Primary.
221
+
89
+
222
+Start the new Secondary (Steps 2 and 3 above, but with primary_ip=127.0.0.2),
90
+eBPF RSS loading functionality located in ebpf/ebpf_rss.c and ebpf/ebpf_rss.h.
223
+then on the old Secondary:
224
+{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.1:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
225
+
91
+
226
+Wait until disk is synced, then:
92
+The `struct EBPFRSSContext` structure that holds 4 file descriptors:
227
+{'execute': 'stop'}
93
+
228
+{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync' } }
94
+- ctx - pointer of the libbpf context.
229
95
+- program_fd - file descriptor of the eBPF RSS program.
230
-5. Failover test
96
+- map_configuration - file descriptor of the 'configuration' map. This map contains one element of 'struct EBPFRSSConfig'. This configuration determines eBPF program behavior.
231
-You can kill Primary VM and run 'x_colo_lost_heartbeat' in Secondary VM's
97
+- map_toeplitz_key - file descriptor of the 'Toeplitz key' map. One element of the 40byte key prepared for the hashing algorithm.
232
-monitor at the same time, then SVM will failover and client will not detect this
98
+- map_indirections_table - 128 elements of queue indexes.
233
-change.
99
+
234
+{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0'}}
100
+`struct EBPFRSSConfig` fields:
235
+{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
101
+
236
102
+- redirect - "boolean" value, should the hash be calculated, on false - `default_queue` would be used as the final decision.
237
-Before issuing '{ "execute": "x-colo-lost-heartbeat" }' command, we have to
103
+- populate_hash - for now, not used. eBPF RSS doesn't support hash reporting.
238
-issue block related command to stop block replication.
104
+- hash_types - binary mask of different hash types. See `VIRTIO_NET_RSS_HASH_TYPE_*` defines. If for packet hash should not be calculated - `default_queue` would be used.
239
-Primary:
105
+- indirections_len - length of the indirections table, maximum 128.
240
- Remove the nbd child from the quorum:
106
+- default_queue - the queue index that used for packet that shouldn't be hashed. For some packets, the hash can't be calculated(g.e ARP).
241
- { 'execute': 'x-blockdev-change', 'arguments': {'parent': 'colo-disk0', 'child': 'children.1'}}
107
+
242
- { 'execute': 'human-monitor-command','arguments': {'command-line': 'drive_del blk-buddy0'}}
108
+Functions:
243
- Note: there is no qmp command to remove the blockdev now
109
+
244
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
110
+- `ebpf_rss_init()` - sets ctx to NULL, which indicates that EBPFRSSContext is not loaded.
245
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
111
+- `ebpf_rss_load()` - creates 3 maps and loads eBPF program from the rss.bpf.skeleton.h. Returns 'true' on success. After that, program_fd can be used to set steering for TAP.
246
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
112
+- `ebpf_rss_set_all()` - sets values for eBPF maps. `indirections_table` length is in EBPFRSSConfig. `toeplitz_key` is VIRTIO_NET_RSS_MAX_KEY_SIZE aka 40 bytes array.
247
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
113
+- `ebpf_rss_unload()` - close all file descriptors and set ctx to NULL.
248
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
114
+
249
115
+Simplified eBPF RSS workflow:
250
-Secondary:
116
+
251
- The primary host is down, so we should do the following thing:
117
+.. code:: C
252
- { 'execute': 'nbd-server-stop' }
118
+
253
+{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
119
+ struct EBPFRSSConfig config;
254
+{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.1:9998' } }
120
+ config.redirect = 1;
255
121
+ config.hash_types = VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4;
256
== TODO ==
122
+ config.indirections_len = VIRTIO_NET_RSS_MAX_TABLE_LEN;
257
-1. Support continuous VM replication.
123
+ config.default_queue = 0;
258
-2. Support shared storage.
124
+
259
-3. Develop the heartbeat part.
125
+ uint16_t table[VIRTIO_NET_RSS_MAX_TABLE_LEN] = {...};
260
-4. Reduce checkpoint VM’s downtime while doing checkpoint.
126
+ uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {...};
261
+1. Support shared storage.
127
+
262
+2. Develop the heartbeat part.
128
+ struct EBPFRSSContext ctx;
263
+3. Reduce checkpoint VM’s downtime while doing checkpoint.
129
+ ebpf_rss_init(&ctx);
264
diff --git a/docs/block-replication.txt b/docs/block-replication.txt
130
+ ebpf_rss_load(&ctx);
265
index XXXXXXX..XXXXXXX 100644
131
+ ebpf_rss_set_all(&ctx, &config, table, key);
266
--- a/docs/block-replication.txt
132
+ if (net_client->info->set_steering_ebpf != NULL) {
267
+++ b/docs/block-replication.txt
133
+ net_client->info->set_steering_ebpf(net_client, ctx->program_fd);
268
@@ -XXX,XX +XXX,XX @@ blocks that are already in QEMU.
134
+ }
269
^ || .----------
135
+ ...
270
| || | Secondary
136
+ ebpf_unload(&ctx);
271
1 Quorum || '----------
272
- / \ ||
273
- / \ ||
274
- Primary 2 filter
275
- disk ^ virtio-blk
276
- | ^
277
- 3 NBD -------> 3 NBD |
278
+ / \ || virtio-blk
279
+ / \ || ^
280
+ Primary 2 filter |
281
+ disk ^ 7 Quorum
282
+ | /
283
+ 3 NBD -------> 3 NBD /
284
client || server 2 filter
285
|| ^ ^
286
--------. || | |
287
@@ -XXX,XX +XXX,XX @@ any state that would otherwise be lost by the speculative write-through
288
of the NBD server into the secondary disk. So before block replication,
289
the primary disk and secondary disk should contain the same data.
290
291
+7) The secondary also has a quorum node, so after secondary failover it
292
+can become the new primary and continue replication.
293
+
137
+
294
+
138
+
295
== Failure Handling ==
139
+NetClientState SetSteeringEBPF()
296
There are 7 internal errors when block replication is running:
140
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
297
1. I/O error on primary disk
298
@@ -XXX,XX +XXX,XX @@ Primary:
299
leading whitespace.
300
5. The qmp command line must be run after running qmp command line in
301
secondary qemu.
302
- 6. After failover we need remove children.1 (replication driver).
303
+ 6. After primary failover we need remove children.1 (replication driver).
304
305
Secondary:
306
-drive if=none,driver=raw,file.filename=1.raw,id=colo1 \
307
- -drive if=xxx,id=topxxx,driver=replication,mode=secondary,top-id=topxxx\
308
+ -drive if=none,id=childs1,driver=replication,mode=secondary,top-id=childs1
309
file.file.filename=active_disk.qcow2,\
310
file.driver=qcow2,\
311
file.backing.file.filename=hidden_disk.qcow2,\
312
file.backing.driver=qcow2,\
313
file.backing.backing=colo1
314
+ -drive if=xxx,driver=quorum,read-pattern=fifo,id=top-disk1,\
315
+ vote-threshold=1,children.0=childs1
316
317
Then run qmp command in secondary qemu:
318
{ 'execute': 'nbd-server-start',
319
@@ -XXX,XX +XXX,XX @@ Secondary:
320
The primary host is down, so we should do the following thing:
321
{ 'execute': 'nbd-server-stop' }
322
323
+Promote Secondary to Primary:
324
+ see COLO-FT.txt
325
+
141
+
326
TODO:
142
+For now, `set_steering_ebpf()` method supported by Linux TAP NetClientState. The method requires an eBPF program file descriptor as an argument.
327
-1. Continuous block replication
143
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
328
-2. Shared disk
144
index XXXXXXX..XXXXXXX 100644
329
+1. Shared disk
145
--- a/docs/devel/index.rst
146
+++ b/docs/devel/index.rst
147
@@ -XXX,XX +XXX,XX @@ Contents:
148
qom
149
block-coroutine-wrapper
150
multi-process
151
+ ebpf_rss
330
--
152
--
331
2.5.0
153
2.7.4
332
154
333
155
diff view generated by jsdifflib
1
From: Yuri Benditovich <yuri.benditovich@daynix.com>
1
From: Andrew Melnychenko <andrew@daynix.com>
2
3
When requested to calculate the hash for TCPV6 packet,
4
ignore overrides of source and destination addresses
5
in in extension headers.
6
Use these overrides when new hash type NetPktRssIpV6TcpEx
7
requested.
8
Use this type in e1000e hash calculation for IPv6 TCP, which
9
should take in account overrides of the addresses.
10
2
11
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
3
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
12
Acked-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
4
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
6
---
15
hw/net/e1000e_core.c | 2 +-
7
MAINTAINERS | 8 ++++++++
16
hw/net/net_rx_pkt.c | 2 +-
8
1 file changed, 8 insertions(+)
17
2 files changed, 2 insertions(+), 2 deletions(-)
18
9
19
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
10
diff --git a/MAINTAINERS b/MAINTAINERS
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/e1000e_core.c
12
--- a/MAINTAINERS
22
+++ b/hw/net/e1000e_core.c
13
+++ b/MAINTAINERS
23
@@ -XXX,XX +XXX,XX @@ e1000e_rss_calc_hash(E1000ECore *core,
14
@@ -XXX,XX +XXX,XX @@ F: include/hw/remote/proxy-memory-listener.h
24
type = NetPktRssIpV4Tcp;
15
F: hw/remote/iohub.c
25
break;
16
F: include/hw/remote/iohub.h
26
case E1000_MRQ_RSS_TYPE_IPV6TCP:
17
27
- type = NetPktRssIpV6Tcp;
18
+EBPF:
28
+ type = NetPktRssIpV6TcpEx;
19
+M: Jason Wang <jasowang@redhat.com>
29
break;
20
+R: Andrew Melnychenko <andrew@daynix.com>
30
case E1000_MRQ_RSS_TYPE_IPV6:
21
+R: Yuri Benditovich <yuri.benditovich@daynix.com>
31
type = NetPktRssIpV6;
22
+S: Maintained
32
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
23
+F: ebpf/*
33
index XXXXXXX..XXXXXXX 100644
24
+F: tools/ebpf/*
34
--- a/hw/net/net_rx_pkt.c
25
+
35
+++ b/hw/net/net_rx_pkt.c
26
Build and test automation
36
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
27
-------------------------
37
assert(pkt->isip6);
28
Build and test automation, general continuous integration
38
assert(pkt->istcp);
39
trace_net_rx_pkt_rss_ip6_tcp();
40
- _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
41
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
42
_net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
43
break;
44
case NetPktRssIpV6:
45
--
29
--
46
2.5.0
30
2.7.4
47
31
48
32
diff view generated by jsdifflib
Deleted patch
1
From: Bin Meng <bmeng.cn@gmail.com>
2
1
3
When CADENCE_GEM_ERR_DEBUG is turned on, there are several
4
compilation errors in DB_PRINT(). Fix them.
5
6
While we are here, update to use appropriate modifiers in
7
the same DB_PRINT() call.
8
9
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
10
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/net/cadence_gem.c | 11 ++++++-----
14
1 file changed, 6 insertions(+), 5 deletions(-)
15
16
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/cadence_gem.c
19
+++ b/hw/net/cadence_gem.c
20
@@ -XXX,XX +XXX,XX @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
21
return -1;
22
}
23
24
- DB_PRINT("copy %d bytes to 0x%x\n", MIN(bytes_to_copy, rxbufsize),
25
- rx_desc_get_buffer(s->rx_desc[q]));
26
+ DB_PRINT("copy %u bytes to 0x%" PRIx64 "\n",
27
+ MIN(bytes_to_copy, rxbufsize),
28
+ rx_desc_get_buffer(s, s->rx_desc[q]));
29
30
/* Copy packet data to emulated DMA buffer */
31
address_space_write(&s->dma_as, rx_desc_get_buffer(s, s->rx_desc[q]) +
32
@@ -XXX,XX +XXX,XX @@ static void gem_transmit(CadenceGEMState *s)
33
34
if (tx_desc_get_length(desc) > sizeof(tx_packet) -
35
(p - tx_packet)) {
36
- DB_PRINT("TX descriptor @ 0x%x too large: size 0x%x space " \
37
- "0x%x\n", (unsigned)packet_desc_addr,
38
- (unsigned)tx_desc_get_length(desc),
39
+ DB_PRINT("TX descriptor @ 0x%" HWADDR_PRIx \
40
+ " too large: size 0x%x space 0x%zx\n",
41
+ packet_desc_addr, tx_desc_get_length(desc),
42
sizeof(tx_packet) - (p - tx_packet));
43
break;
44
}
45
--
46
2.5.0
47
48
diff view generated by jsdifflib
Deleted patch
1
From: Lukas Straub <lukasstraub2@web.de>
2
1
3
After failover the Secondary side of replication shouldn't change state, because
4
it now functions as our primary disk.
5
6
In replication_start, replication_do_checkpoint, replication_stop, ignore
7
the request if current state is BLOCK_REPLICATION_DONE (sucessful failover) or
8
BLOCK_REPLICATION_FAILOVER (failover in progres i.e. currently merging active
9
and hidden images into the base image).
10
11
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
12
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
13
Acked-by: Max Reitz <mreitz@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
block/replication.c | 35 ++++++++++++++++++++++++++++++++++-
17
1 file changed, 34 insertions(+), 1 deletion(-)
18
19
diff --git a/block/replication.c b/block/replication.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/replication.c
22
+++ b/block/replication.c
23
@@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
24
aio_context_acquire(aio_context);
25
s = bs->opaque;
26
27
+ if (s->stage == BLOCK_REPLICATION_DONE ||
28
+ s->stage == BLOCK_REPLICATION_FAILOVER) {
29
+ /*
30
+ * This case happens when a secondary is promoted to primary.
31
+ * Ignore the request because the secondary side of replication
32
+ * doesn't have to do anything anymore.
33
+ */
34
+ aio_context_release(aio_context);
35
+ return;
36
+ }
37
+
38
if (s->stage != BLOCK_REPLICATION_NONE) {
39
error_setg(errp, "Block replication is running or done");
40
aio_context_release(aio_context);
41
@@ -XXX,XX +XXX,XX @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
42
aio_context_acquire(aio_context);
43
s = bs->opaque;
44
45
+ if (s->stage == BLOCK_REPLICATION_DONE ||
46
+ s->stage == BLOCK_REPLICATION_FAILOVER) {
47
+ /*
48
+ * This case happens when a secondary was promoted to primary.
49
+ * Ignore the request because the secondary side of replication
50
+ * doesn't have to do anything anymore.
51
+ */
52
+ aio_context_release(aio_context);
53
+ return;
54
+ }
55
+
56
if (s->mode == REPLICATION_MODE_SECONDARY) {
57
secondary_do_checkpoint(s, errp);
58
}
59
@@ -XXX,XX +XXX,XX @@ static void replication_get_error(ReplicationState *rs, Error **errp)
60
aio_context_acquire(aio_context);
61
s = bs->opaque;
62
63
- if (s->stage != BLOCK_REPLICATION_RUNNING) {
64
+ if (s->stage == BLOCK_REPLICATION_NONE) {
65
error_setg(errp, "Block replication is not running");
66
aio_context_release(aio_context);
67
return;
68
@@ -XXX,XX +XXX,XX @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
69
aio_context_acquire(aio_context);
70
s = bs->opaque;
71
72
+ if (s->stage == BLOCK_REPLICATION_DONE ||
73
+ s->stage == BLOCK_REPLICATION_FAILOVER) {
74
+ /*
75
+ * This case happens when a secondary was promoted to primary.
76
+ * Ignore the request because the secondary side of replication
77
+ * doesn't have to do anything anymore.
78
+ */
79
+ aio_context_release(aio_context);
80
+ return;
81
+ }
82
+
83
if (s->stage != BLOCK_REPLICATION_RUNNING) {
84
error_setg(errp, "Block replication is not running");
85
aio_context_release(aio_context);
86
--
87
2.5.0
88
89
diff view generated by jsdifflib
Deleted patch
1
From: Lukas Straub <lukasstraub2@web.de>
2
1
3
This simulates the case that happens when we resume COLO after failover.
4
5
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
tests/test-replication.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
9
1 file changed, 52 insertions(+)
10
11
diff --git a/tests/test-replication.c b/tests/test-replication.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tests/test-replication.c
14
+++ b/tests/test-replication.c
15
@@ -XXX,XX +XXX,XX @@ static void test_secondary_stop(void)
16
teardown_secondary();
17
}
18
19
+static void test_secondary_continuous_replication(void)
20
+{
21
+ BlockBackend *top_blk, *local_blk;
22
+ Error *local_err = NULL;
23
+
24
+ top_blk = start_secondary();
25
+ replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
26
+ g_assert(!local_err);
27
+
28
+ /* write 0x22 to s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
29
+ local_blk = blk_by_name(S_LOCAL_DISK_ID);
30
+ test_blk_write(local_blk, 0x22, IMG_SIZE / 2, IMG_SIZE / 2, false);
31
+
32
+ /* replication will backup s_local_disk to s_hidden_disk */
33
+ test_blk_read(top_blk, 0x11, IMG_SIZE / 2,
34
+ IMG_SIZE / 2, 0, IMG_SIZE, false);
35
+
36
+ /* write 0x33 to s_active_disk (0, IMG_SIZE / 2) */
37
+ test_blk_write(top_blk, 0x33, 0, IMG_SIZE / 2, false);
38
+
39
+ /* do failover (active commit) */
40
+ replication_stop_all(true, &local_err);
41
+ g_assert(!local_err);
42
+
43
+ /* it should ignore all requests from now on */
44
+
45
+ /* start after failover */
46
+ replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
47
+ g_assert(!local_err);
48
+
49
+ /* checkpoint */
50
+ replication_do_checkpoint_all(&local_err);
51
+ g_assert(!local_err);
52
+
53
+ /* stop */
54
+ replication_stop_all(true, &local_err);
55
+ g_assert(!local_err);
56
+
57
+ /* read from s_local_disk (0, IMG_SIZE / 2) */
58
+ test_blk_read(top_blk, 0x33, 0, IMG_SIZE / 2,
59
+ 0, IMG_SIZE / 2, false);
60
+
61
+
62
+ /* read from s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
63
+ test_blk_read(top_blk, 0x22, IMG_SIZE / 2,
64
+ IMG_SIZE / 2, 0, IMG_SIZE, false);
65
+
66
+ teardown_secondary();
67
+}
68
+
69
static void test_secondary_do_checkpoint(void)
70
{
71
BlockBackend *top_blk, *local_blk;
72
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
73
g_test_add_func("/replication/secondary/write", test_secondary_write);
74
g_test_add_func("/replication/secondary/start", test_secondary_start);
75
g_test_add_func("/replication/secondary/stop", test_secondary_stop);
76
+ g_test_add_func("/replication/secondary/continuous_replication",
77
+ test_secondary_continuous_replication);
78
g_test_add_func("/replication/secondary/do_checkpoint",
79
test_secondary_do_checkpoint);
80
g_test_add_func("/replication/secondary/get_error_all",
81
--
82
2.5.0
83
84
diff view generated by jsdifflib