1
The following changes since commit e0175b71638cf4398903c0d25f93fe62e0606389:
1
The following changes since commit d90f154867ec0ec22fd719164b88716e8fd48672:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200228' into staging (2020-02-28 16:39:27 +0000)
3
Merge remote-tracking branch 'remotes/dg-gitlab/tags/ppc-for-6.1-20210504' into staging (2021-05-05 20:29:14 +0100)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 41aa2e3f9b27fd259a13711545d933a20f1d2f16:
9
for you to fetch changes up to 4f8a39494aded9f2026a26b137378ea2ee3d5338:
10
10
11
l2tpv3: fix RFC number typo in qemu-options.hx (2020-03-02 15:30:08 +0800)
11
tap-bsd: Remove special casing for older OpenBSD releases (2021-05-27 11:03:55 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Bin Meng (1):
16
Brad Smith (1):
17
hw: net: cadence_gem: Fix build errors in DB_PRINT()
17
tap-bsd: Remove special casing for older OpenBSD releases
18
18
19
Finn Thain (14):
19
Guenter Roeck (1):
20
dp8393x: Mask EOL bit from descriptor addresses
20
hw/net/imx_fec: return 0xffff when accessing non-existing PHY
21
dp8393x: Always use 32-bit accesses
22
dp8393x: Clean up endianness hacks
23
dp8393x: Have dp8393x_receive() return the packet size
24
dp8393x: Update LLFA and CRDA registers from rx descriptor
25
dp8393x: Clear RRRA command register bit only when appropriate
26
dp8393x: Implement packet size limit and RBAE interrupt
27
dp8393x: Don't clobber packet checksum
28
dp8393x: Use long-word-aligned RRA pointers in 32-bit mode
29
dp8393x: Pad frames to word or long word boundary
30
dp8393x: Clear descriptor in_use field to release packet
31
dp8393x: Always update RRA pointers and sequence numbers
32
dp8393x: Don't reset Silicon Revision register
33
dp8393x: Don't stop reception upon RBE interrupt assertion
34
21
35
Lukas Straub (4):
22
Laurent Vivier (1):
36
block/replication.c: Ignore requests after failover
23
virtio-net: failover: add missing remove_migration_state_change_notifier()
37
tests/test-replication.c: Add test for for secondary node continuing replication
38
net/filter.c: Add Options to insert filters anywhere in the filter list
39
colo: Update Documentation for continuous replication
40
24
41
Stefan Hajnoczi (1):
25
hw/net/imx_fec.c | 8 +++-----
42
l2tpv3: fix RFC number typo in qemu-options.hx
26
hw/net/trace-events | 2 ++
43
27
hw/net/virtio-net.c | 1 +
44
Yuri Benditovich (3):
28
net/tap-bsd.c | 8 --------
45
e1000e: Avoid hw_error if legacy mode used
29
4 files changed, 6 insertions(+), 13 deletions(-)
46
NetRxPkt: Introduce support for additional hash types
47
NetRxPkt: fix hash calculation of IPV6 TCP
48
49
block/replication.c | 35 ++++++-
50
docs/COLO-FT.txt | 224 +++++++++++++++++++++++++++++++++------------
51
docs/block-replication.txt | 28 ++++--
52
hw/net/cadence_gem.c | 11 ++-
53
hw/net/dp8393x.c | 200 ++++++++++++++++++++++++++--------------
54
hw/net/e1000e_core.c | 15 +--
55
hw/net/net_rx_pkt.c | 44 ++++++++-
56
hw/net/net_rx_pkt.h | 6 +-
57
hw/net/trace-events | 4 +
58
include/net/filter.h | 2 +
59
net/filter.c | 92 ++++++++++++++++++-
60
qemu-options.hx | 35 +++++--
61
tests/test-replication.c | 52 +++++++++++
62
13 files changed, 591 insertions(+), 157 deletions(-)
63
30
64
31
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
The Least Significant bit of a descriptor address register is used as
4
an EOL flag. It has to be masked when the register value is to be used
5
as an actual address for copying memory around. But when the registers
6
are to be updated the EOL bit should not be masked.
7
8
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
9
Tested-by: Laurent Vivier <laurent@vivier.eu>
10
---
11
hw/net/dp8393x.c | 17 +++++++++++------
12
1 file changed, 11 insertions(+), 6 deletions(-)
13
14
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/dp8393x.c
17
+++ b/hw/net/dp8393x.c
18
@@ -XXX,XX +XXX,XX @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
19
#define SONIC_ISR_PINT 0x0800
20
#define SONIC_ISR_LCD 0x1000
21
22
+#define SONIC_DESC_EOL 0x0001
23
+#define SONIC_DESC_ADDR 0xFFFE
24
+
25
#define TYPE_DP8393X "dp8393x"
26
#define DP8393X(obj) OBJECT_CHECK(dp8393xState, (obj), TYPE_DP8393X)
27
28
@@ -XXX,XX +XXX,XX @@ static uint32_t dp8393x_crba(dp8393xState *s)
29
30
static uint32_t dp8393x_crda(dp8393xState *s)
31
{
32
- return (s->regs[SONIC_URDA] << 16) | s->regs[SONIC_CRDA];
33
+ return (s->regs[SONIC_URDA] << 16) |
34
+ (s->regs[SONIC_CRDA] & SONIC_DESC_ADDR);
35
}
36
37
static uint32_t dp8393x_rbwc(dp8393xState *s)
38
@@ -XXX,XX +XXX,XX @@ static uint32_t dp8393x_tsa(dp8393xState *s)
39
40
static uint32_t dp8393x_ttda(dp8393xState *s)
41
{
42
- return (s->regs[SONIC_UTDA] << 16) | s->regs[SONIC_TTDA];
43
+ return (s->regs[SONIC_UTDA] << 16) |
44
+ (s->regs[SONIC_TTDA] & SONIC_DESC_ADDR);
45
}
46
47
static uint32_t dp8393x_wt(dp8393xState *s)
48
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
49
MEMTXATTRS_UNSPECIFIED, s->data,
50
size);
51
s->regs[SONIC_CTDA] = dp8393x_get(s, width, 0) & ~0x1;
52
- if (dp8393x_get(s, width, 0) & 0x1) {
53
+ if (dp8393x_get(s, width, 0) & SONIC_DESC_EOL) {
54
/* EOL detected */
55
break;
56
}
57
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
58
/* XXX: Check byte ordering */
59
60
/* Check for EOL */
61
- if (s->regs[SONIC_LLFA] & 0x1) {
62
+ if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
63
/* Are we still in resource exhaustion? */
64
size = sizeof(uint16_t) * 1 * width;
65
address = dp8393x_crda(s) + sizeof(uint16_t) * 5 * width;
66
address_space_read(&s->as, address, MEMTXATTRS_UNSPECIFIED,
67
s->data, size);
68
- if (dp8393x_get(s, width, 0) & 0x1) {
69
+ if (dp8393x_get(s, width, 0) & SONIC_DESC_EOL) {
70
/* Still EOL ; stop reception */
71
return -1;
72
} else {
73
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
74
dp8393x_crda(s) + sizeof(uint16_t) * 5 * width,
75
MEMTXATTRS_UNSPECIFIED, s->data, size);
76
s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
77
- if (s->regs[SONIC_LLFA] & 0x1) {
78
+ if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
79
/* EOL detected */
80
s->regs[SONIC_ISR] |= SONIC_ISR_RDE;
81
} else {
82
--
83
2.5.0
84
85
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
The DP83932 and DP83934 have 32 data lines. The datasheet says,
4
5
Data Bus: These bidirectional lines are used to transfer data on the
6
system bus. When the SONIC is a bus master, 16-bit data is transferred
7
on D15-D0 and 32-bit data is transferred on D31-D0. When the SONIC is
8
accessed as a slave, register data is driven onto lines D15-D0.
9
D31-D16 are held TRI-STATE if SONIC is in 16-bit mode. If SONIC is in
10
32-bit mode, they are driven, but invalid.
11
12
Always use 32-bit accesses both as bus master and bus slave.
13
14
Force the MSW to zero in bus master mode.
15
16
This gets the Linux 'jazzsonic' driver working, and avoids the need for
17
prior hacks to make the NetBSD 'sn' driver work.
18
19
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
20
Tested-by: Laurent Vivier <laurent@vivier.eu>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
23
hw/net/dp8393x.c | 47 +++++++++++++++++++++++++++++------------------
24
1 file changed, 29 insertions(+), 18 deletions(-)
25
26
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/net/dp8393x.c
29
+++ b/hw/net/dp8393x.c
30
@@ -XXX,XX +XXX,XX @@ static void dp8393x_put(dp8393xState *s, int width, int offset,
31
uint16_t val)
32
{
33
if (s->big_endian) {
34
- s->data[offset * width + width - 1] = cpu_to_be16(val);
35
+ if (width == 2) {
36
+ s->data[offset * 2] = 0;
37
+ s->data[offset * 2 + 1] = cpu_to_be16(val);
38
+ } else {
39
+ s->data[offset] = cpu_to_be16(val);
40
+ }
41
} else {
42
- s->data[offset * width] = cpu_to_le16(val);
43
+ if (width == 2) {
44
+ s->data[offset * 2] = cpu_to_le16(val);
45
+ s->data[offset * 2 + 1] = 0;
46
+ } else {
47
+ s->data[offset] = cpu_to_le16(val);
48
+ }
49
}
50
}
51
52
@@ -XXX,XX +XXX,XX @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
53
54
DPRINTF("read 0x%04x from reg %s\n", val, reg_names[reg]);
55
56
- return val;
57
+ return s->big_endian ? val << 16 : val;
58
}
59
60
static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
61
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
62
{
63
dp8393xState *s = opaque;
64
int reg = addr >> s->it_shift;
65
+ uint32_t val = s->big_endian ? data >> 16 : data;
66
67
- DPRINTF("write 0x%04x to reg %s\n", (uint16_t)data, reg_names[reg]);
68
+ DPRINTF("write 0x%04x to reg %s\n", (uint16_t)val, reg_names[reg]);
69
70
switch (reg) {
71
/* Command register */
72
case SONIC_CR:
73
- dp8393x_do_command(s, data);
74
+ dp8393x_do_command(s, val);
75
break;
76
/* Prevent write to read-only registers */
77
case SONIC_CAP2:
78
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
79
/* Accept write to some registers only when in reset mode */
80
case SONIC_DCR:
81
if (s->regs[SONIC_CR] & SONIC_CR_RST) {
82
- s->regs[reg] = data & 0xbfff;
83
+ s->regs[reg] = val & 0xbfff;
84
} else {
85
DPRINTF("writing to DCR invalid\n");
86
}
87
break;
88
case SONIC_DCR2:
89
if (s->regs[SONIC_CR] & SONIC_CR_RST) {
90
- s->regs[reg] = data & 0xf017;
91
+ s->regs[reg] = val & 0xf017;
92
} else {
93
DPRINTF("writing to DCR2 invalid\n");
94
}
95
break;
96
/* 12 lower bytes are Read Only */
97
case SONIC_TCR:
98
- s->regs[reg] = data & 0xf000;
99
+ s->regs[reg] = val & 0xf000;
100
break;
101
/* 9 lower bytes are Read Only */
102
case SONIC_RCR:
103
- s->regs[reg] = data & 0xffe0;
104
+ s->regs[reg] = val & 0xffe0;
105
break;
106
/* Ignore most significant bit */
107
case SONIC_IMR:
108
- s->regs[reg] = data & 0x7fff;
109
+ s->regs[reg] = val & 0x7fff;
110
dp8393x_update_irq(s);
111
break;
112
/* Clear bits by writing 1 to them */
113
case SONIC_ISR:
114
- data &= s->regs[reg];
115
- s->regs[reg] &= ~data;
116
- if (data & SONIC_ISR_RBE) {
117
+ val &= s->regs[reg];
118
+ s->regs[reg] &= ~val;
119
+ if (val & SONIC_ISR_RBE) {
120
dp8393x_do_read_rra(s);
121
}
122
dp8393x_update_irq(s);
123
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
124
case SONIC_REA:
125
case SONIC_RRP:
126
case SONIC_RWP:
127
- s->regs[reg] = data & 0xfffe;
128
+ s->regs[reg] = val & 0xfffe;
129
break;
130
/* Invert written value for some registers */
131
case SONIC_CRCT:
132
case SONIC_FAET:
133
case SONIC_MPT:
134
- s->regs[reg] = data ^ 0xffff;
135
+ s->regs[reg] = val ^ 0xffff;
136
break;
137
/* All other registers have no special contrainst */
138
default:
139
- s->regs[reg] = data;
140
+ s->regs[reg] = val;
141
}
142
143
if (reg == SONIC_WT0 || reg == SONIC_WT1) {
144
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
145
static const MemoryRegionOps dp8393x_ops = {
146
.read = dp8393x_read,
147
.write = dp8393x_write,
148
- .impl.min_access_size = 2,
149
- .impl.max_access_size = 2,
150
+ .impl.min_access_size = 4,
151
+ .impl.max_access_size = 4,
152
.endianness = DEVICE_NATIVE_ENDIAN,
153
};
154
155
--
156
2.5.0
157
158
diff view generated by jsdifflib
1
From: Yuri Benditovich <yuri.benditovich@daynix.com>
1
From: Guenter Roeck <linux@roeck-us.net>
2
2
3
Add support for following hash types:
3
If a PHY does not exist, attempts to read from it should return 0xffff.
4
IPV6 TCP with extension headers
4
Otherwise the Linux kernel will believe that a PHY is there and select
5
IPV4 UDP
5
the non-existing PHY. This in turn will result in network errors later
6
IPV6 UDP
6
on since the real PHY is not selected or configured.
7
IPV6 UDP with extension headers
8
7
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
8
Since reading from or writing to a non-existing PHY is not an emulation
10
Acked-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
9
error, replace guest error messages with traces.
10
11
Fixes: 461c51ad4275 ("Add a phy-num property to the i.MX FEC emulator")
12
Cc: Jean-Christophe Dubois <jcd@tribudubois.net>
13
Reviewed-by: Bin Meng <bmeng.cn@gmail.com>
14
Tested-by: Bin Meng <bmeng.cn@gmail.com>
15
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
16
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
18
---
13
hw/net/net_rx_pkt.c | 42 ++++++++++++++++++++++++++++++++++++++++++
19
hw/net/imx_fec.c | 8 +++-----
14
hw/net/net_rx_pkt.h | 6 +++++-
20
hw/net/trace-events | 2 ++
15
hw/net/trace-events | 4 ++++
21
2 files changed, 5 insertions(+), 5 deletions(-)
16
3 files changed, 51 insertions(+), 1 deletion(-)
17
22
18
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
23
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
19
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/net_rx_pkt.c
25
--- a/hw/net/imx_fec.c
21
+++ b/hw/net/net_rx_pkt.c
26
+++ b/hw/net/imx_fec.c
22
@@ -XXX,XX +XXX,XX @@ _net_rx_rss_prepare_tcp(uint8_t *rss_input,
27
@@ -XXX,XX +XXX,XX @@ static uint32_t imx_phy_read(IMXFECState *s, int reg)
23
&tcphdr->th_dport, sizeof(uint16_t));
28
uint32_t phy = reg / 32;
24
}
29
25
30
if (phy != s->phy_num) {
26
+static inline void
31
- qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad phy num %u\n",
27
+_net_rx_rss_prepare_udp(uint8_t *rss_input,
32
- TYPE_IMX_FEC, __func__, phy);
28
+ struct NetRxPkt *pkt,
33
- return 0;
29
+ size_t *bytes_written)
34
+ trace_imx_phy_read_num(phy, s->phy_num);
30
+{
35
+ return 0xffff;
31
+ struct udp_header *udphdr = &pkt->l4hdr_info.hdr.udp;
36
}
32
+
37
33
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
38
reg %= 32;
34
+ &udphdr->uh_sport, sizeof(uint16_t));
39
@@ -XXX,XX +XXX,XX @@ static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
35
+
40
uint32_t phy = reg / 32;
36
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
41
37
+ &udphdr->uh_dport, sizeof(uint16_t));
42
if (phy != s->phy_num) {
38
+}
43
- qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad phy num %u\n",
39
+
44
- TYPE_IMX_FEC, __func__, phy);
40
uint32_t
45
+ trace_imx_phy_write_num(phy, s->phy_num);
41
net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
46
return;
42
NetRxPktRssType type,
47
}
43
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
48
44
trace_net_rx_pkt_rss_ip6_ex();
45
_net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
46
break;
47
+ case NetPktRssIpV6TcpEx:
48
+ assert(pkt->isip6);
49
+ assert(pkt->istcp);
50
+ trace_net_rx_pkt_rss_ip6_ex_tcp();
51
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
52
+ _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
53
+ break;
54
+ case NetPktRssIpV4Udp:
55
+ assert(pkt->isip4);
56
+ assert(pkt->isudp);
57
+ trace_net_rx_pkt_rss_ip4_udp();
58
+ _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
59
+ _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
60
+ break;
61
+ case NetPktRssIpV6Udp:
62
+ assert(pkt->isip6);
63
+ assert(pkt->isudp);
64
+ trace_net_rx_pkt_rss_ip6_udp();
65
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
66
+ _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
67
+ break;
68
+ case NetPktRssIpV6UdpEx:
69
+ assert(pkt->isip6);
70
+ assert(pkt->isudp);
71
+ trace_net_rx_pkt_rss_ip6_ex_udp();
72
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
73
+ _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
74
+ break;
75
default:
76
assert(false);
77
break;
78
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/net/net_rx_pkt.h
81
+++ b/hw/net/net_rx_pkt.h
82
@@ -XXX,XX +XXX,XX @@ typedef enum {
83
NetPktRssIpV4Tcp,
84
NetPktRssIpV6Tcp,
85
NetPktRssIpV6,
86
- NetPktRssIpV6Ex
87
+ NetPktRssIpV6Ex,
88
+ NetPktRssIpV6TcpEx,
89
+ NetPktRssIpV4Udp,
90
+ NetPktRssIpV6Udp,
91
+ NetPktRssIpV6UdpEx,
92
} NetRxPktRssType;
93
94
/**
95
diff --git a/hw/net/trace-events b/hw/net/trace-events
49
diff --git a/hw/net/trace-events b/hw/net/trace-events
96
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
97
--- a/hw/net/trace-events
51
--- a/hw/net/trace-events
98
+++ b/hw/net/trace-events
52
+++ b/hw/net/trace-events
99
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_l3_csum_validate_csum(size_t l3hdr_off, uint32_t csl, uint32_t cntr,
53
@@ -XXX,XX +XXX,XX @@ i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION"
100
54
101
net_rx_pkt_rss_ip4(void) "Calculating IPv4 RSS hash"
55
# imx_fec.c
102
net_rx_pkt_rss_ip4_tcp(void) "Calculating IPv4/TCP RSS hash"
56
imx_phy_read(uint32_t val, int phy, int reg) "0x%04"PRIx32" <= phy[%d].reg[%d]"
103
+net_rx_pkt_rss_ip4_udp(void) "Calculating IPv4/UDP RSS hash"
57
+imx_phy_read_num(int phy, int configured) "read request from unconfigured phy %d (configured %d)"
104
net_rx_pkt_rss_ip6_tcp(void) "Calculating IPv6/TCP RSS hash"
58
imx_phy_write(uint32_t val, int phy, int reg) "0x%04"PRIx32" => phy[%d].reg[%d]"
105
+net_rx_pkt_rss_ip6_udp(void) "Calculating IPv6/UDP RSS hash"
59
+imx_phy_write_num(int phy, int configured) "write request to unconfigured phy %d (configured %d)"
106
net_rx_pkt_rss_ip6(void) "Calculating IPv6 RSS hash"
60
imx_phy_update_link(const char *s) "%s"
107
net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
61
imx_phy_reset(void) ""
108
+net_rx_pkt_rss_ip6_ex_tcp(void) "Calculating IPv6/EX/TCP RSS hash"
62
imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x"
109
+net_rx_pkt_rss_ip6_ex_udp(void) "Calculating IPv6/EX/UDP RSS hash"
110
net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %zu bytes: 0x%X"
111
net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %zu bytes, RSS input offset %zu bytes"
112
113
--
63
--
114
2.5.0
64
2.7.4
115
65
116
66
diff view generated by jsdifflib
1
From: Finn Thain <fthain@telegraphics.com.au>
1
From: Laurent Vivier <lvivier@redhat.com>
2
2
3
According to the datasheet, section 3.4.4, "in 32-bit mode ... the SONIC
3
In the failover case configuration, virtio_net_device_realize() uses an
4
always writes long words".
4
add_migration_state_change_notifier() to add a state notifier, but this
5
notifier is not removed by the unrealize function when the virtio-net
6
card is unplugged.
5
7
6
Therefore, use the same technique for the 'in_use' field that is used
8
If the card is unplugged and a migration is started, the notifier is
7
everywhere else, and write the full long word.
9
called and as it is not valid anymore QEMU crashes.
8
10
9
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
11
This patch fixes the problem by adding the
10
Tested-by: Laurent Vivier <laurent@vivier.eu>
12
remove_migration_state_change_notifier() in virtio_net_device_unrealize().
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
13
14
The problem can be reproduced with:
15
16
$ qemu-system-x86_64 -enable-kvm -m 1g -M q35 \
17
-device pcie-root-port,slot=4,id=root1 \
18
-device pcie-root-port,slot=5,id=root2 \
19
-device virtio-net-pci,id=net1,mac=52:54:00:6f:55:cc,failover=on,bus=root1 \
20
-monitor stdio disk.qcow2
21
(qemu) device_del net1
22
(qemu) migrate "exec:gzip -c > STATEFILE.gz"
23
24
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
25
0x0000000000000000 in ?? ()
26
(gdb) bt
27
#0 0x0000000000000000 in ()
28
#1 0x0000555555d726d7 in notifier_list_notify (...)
29
at .../util/notify.c:39
30
#2 0x0000555555842c1a in migrate_fd_connect (...)
31
at .../migration/migration.c:3975
32
#3 0x0000555555950f7d in migration_channel_connect (...)
33
error@entry=0x0) at .../migration/channel.c:107
34
#4 0x0000555555910922 in exec_start_outgoing_migration (...)
35
at .../migration/exec.c:42
36
37
Reported-by: Igor Mammedov <imammedo@redhat.com>
38
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
39
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
40
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
41
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
42
---
14
hw/net/dp8393x.c | 17 ++++++-----------
43
hw/net/virtio-net.c | 1 +
15
1 file changed, 6 insertions(+), 11 deletions(-)
44
1 file changed, 1 insertion(+)
16
45
17
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
46
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
18
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/dp8393x.c
48
--- a/hw/net/virtio-net.c
20
+++ b/hw/net/dp8393x.c
49
+++ b/hw/net/virtio-net.c
21
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
50
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_unrealize(DeviceState *dev)
22
return -1;
51
52
if (n->failover) {
53
device_listener_unregister(&n->primary_listener);
54
+ remove_migration_state_change_notifier(&n->migration_state);
23
}
55
}
24
56
25
- /* XXX: Check byte ordering */
57
max_queues = n->multiqueue ? n->max_queues : 1;
26
-
27
/* Check for EOL */
28
if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
29
/* Are we still in resource exhaustion? */
30
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
31
/* EOL detected */
32
s->regs[SONIC_ISR] |= SONIC_ISR_RDE;
33
} else {
34
- /* Clear in_use, but it is always 16bit wide */
35
- int offset = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
36
- if (s->big_endian && width == 2) {
37
- /* we need to adjust the offset of the 16bit field */
38
- offset += sizeof(uint16_t);
39
- }
40
- s->data[0] = 0;
41
- address_space_write(&s->as, offset, MEMTXATTRS_UNSPECIFIED,
42
- s->data, sizeof(uint16_t));
43
+ /* Clear in_use */
44
+ size = sizeof(uint16_t) * width;
45
+ address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
46
+ dp8393x_put(s, width, 0, 0);
47
+ address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
48
+ s->data, size);
49
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
50
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
51
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
52
--
58
--
53
2.5.0
59
2.7.4
54
60
55
61
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
This function re-uses its 'size' argument as a scratch variable.
4
Instead, declare a local 'size' variable for that purpose so that the
5
function result doesn't get messed up.
6
7
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Tested-by: Laurent Vivier <laurent@vivier.eu>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/dp8393x.c | 9 +++++----
13
1 file changed, 5 insertions(+), 4 deletions(-)
14
15
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/dp8393x.c
18
+++ b/hw/net/dp8393x.c
19
@@ -XXX,XX +XXX,XX @@ static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf,
20
}
21
22
static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
23
- size_t size)
24
+ size_t pkt_size)
25
{
26
dp8393xState *s = qemu_get_nic_opaque(nc);
27
int packet_type;
28
uint32_t available, address;
29
- int width, rx_len = size;
30
+ int width, rx_len = pkt_size;
31
uint32_t checksum;
32
+ int size;
33
34
width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
35
36
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
37
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
38
39
- packet_type = dp8393x_receive_filter(s, buf, size);
40
+ packet_type = dp8393x_receive_filter(s, buf, pkt_size);
41
if (packet_type < 0) {
42
DPRINTF("packet not for netcard\n");
43
return -1;
44
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
45
/* Done */
46
dp8393x_update_irq(s);
47
48
- return size;
49
+ return pkt_size;
50
}
51
52
static void dp8393x_reset(DeviceState *dev)
53
--
54
2.5.0
55
56
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
Follow the algorithm given in the National Semiconductor DP83932C
4
datasheet in section 3.4.7:
5
6
At the next reception, the SONIC re-reads the last RXpkt.link field,
7
and updates its CRDA register to point to the next descriptor.
8
9
The chip is designed to allow the host to provide a new list of
10
descriptors in this way.
11
12
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
13
Tested-by: Laurent Vivier <laurent@vivier.eu>
14
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/net/dp8393x.c | 11 +++++++----
18
1 file changed, 7 insertions(+), 4 deletions(-)
19
20
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/net/dp8393x.c
23
+++ b/hw/net/dp8393x.c
24
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
25
address = dp8393x_crda(s) + sizeof(uint16_t) * 5 * width;
26
address_space_read(&s->as, address, MEMTXATTRS_UNSPECIFIED,
27
s->data, size);
28
- if (dp8393x_get(s, width, 0) & SONIC_DESC_EOL) {
29
+ s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
30
+ if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
31
/* Still EOL ; stop reception */
32
return -1;
33
- } else {
34
- s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
35
}
36
+ /* Link has been updated by host */
37
+ s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
38
}
39
40
/* Save current position */
41
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
42
MEMTXATTRS_UNSPECIFIED,
43
s->data, size);
44
45
- /* Move to next descriptor */
46
+ /* Check link field */
47
size = sizeof(uint16_t) * width;
48
address_space_read(&s->as,
49
dp8393x_crda(s) + sizeof(uint16_t) * 5 * width,
50
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
51
dp8393x_put(s, width, 0, 0);
52
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
53
s->data, size);
54
+
55
+ /* Move to next descriptor */
56
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
57
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
58
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
59
--
60
2.5.0
61
62
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
It doesn't make sense to clear the command register bit unless the
4
command was actually issued.
5
6
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Tested-by: Laurent Vivier <laurent@vivier.eu>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/dp8393x.c | 7 +++----
12
1 file changed, 3 insertions(+), 4 deletions(-)
13
14
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/dp8393x.c
17
+++ b/hw/net/dp8393x.c
18
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_read_rra(dp8393xState *s)
19
s->regs[SONIC_ISR] |= SONIC_ISR_RBE;
20
dp8393x_update_irq(s);
21
}
22
-
23
- /* Done */
24
- s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
25
}
26
27
static void dp8393x_do_software_reset(dp8393xState *s)
28
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_command(dp8393xState *s, uint16_t command)
29
dp8393x_do_start_timer(s);
30
if (command & SONIC_CR_RST)
31
dp8393x_do_software_reset(s);
32
- if (command & SONIC_CR_RRRA)
33
+ if (command & SONIC_CR_RRRA) {
34
dp8393x_do_read_rra(s);
35
+ s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
36
+ }
37
if (command & SONIC_CR_LCAM)
38
dp8393x_do_load_cam(s);
39
}
40
--
41
2.5.0
42
43
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
Add a bounds check to prevent a large packet from causing a buffer
4
overflow. This is defensive programming -- I haven't actually tried
5
sending an oversized packet or a jumbo ethernet frame.
6
7
The SONIC handles packets that are too big for the buffer by raising
8
the RBAE interrupt and dropping them. Linux uses that interrupt to
9
count dropped packets.
10
11
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
12
Tested-by: Laurent Vivier <laurent@vivier.eu>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
hw/net/dp8393x.c | 9 +++++++++
16
1 file changed, 9 insertions(+)
17
18
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/dp8393x.c
21
+++ b/hw/net/dp8393x.c
22
@@ -XXX,XX +XXX,XX @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
23
#define SONIC_TCR_CRCI 0x2000
24
#define SONIC_TCR_PINT 0x8000
25
26
+#define SONIC_ISR_RBAE 0x0010
27
#define SONIC_ISR_RBE 0x0020
28
#define SONIC_ISR_RDE 0x0040
29
#define SONIC_ISR_TC 0x0080
30
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
31
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
32
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
33
34
+ if (pkt_size + 4 > dp8393x_rbwc(s) * 2) {
35
+ DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
36
+ s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
37
+ dp8393x_update_irq(s);
38
+ dp8393x_do_read_rra(s);
39
+ return pkt_size;
40
+ }
41
+
42
packet_type = dp8393x_receive_filter(s, buf, pkt_size);
43
if (packet_type < 0) {
44
DPRINTF("packet not for netcard\n");
45
--
46
2.5.0
47
48
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
A received packet consumes pkt_size bytes in the buffer and the frame
4
checksum that's appended to it consumes another 4 bytes. The Receive
5
Buffer Address register takes the former quantity into account but
6
not the latter. So the next packet written to the buffer overwrites
7
the frame checksum. Fix this.
8
9
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
10
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Tested-by: Laurent Vivier <laurent@vivier.eu>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/dp8393x.c | 1 +
15
1 file changed, 1 insertion(+)
16
17
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/dp8393x.c
20
+++ b/hw/net/dp8393x.c
21
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
22
address += rx_len;
23
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
24
&checksum, 4);
25
+ address += 4;
26
rx_len += 4;
27
s->regs[SONIC_CRBA1] = address >> 16;
28
s->regs[SONIC_CRBA0] = address & 0xffff;
29
--
30
2.5.0
31
32
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
Section 3.4.1 of the datasheet says,
4
5
The alignment of the RRA is confined to either word or long word
6
boundaries, depending upon the data width mode. In 16-bit mode,
7
the RRA must be aligned to a word boundary (A0 is always zero)
8
and in 32-bit mode, the RRA is aligned to a long word boundary
9
(A0 and A1 are always zero).
10
11
This constraint has been implemented for 16-bit mode; implement it
12
for 32-bit mode too.
13
14
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
15
Tested-by: Laurent Vivier <laurent@vivier.eu>
16
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
19
hw/net/dp8393x.c | 8 ++++++--
20
1 file changed, 6 insertions(+), 2 deletions(-)
21
22
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/net/dp8393x.c
25
+++ b/hw/net/dp8393x.c
26
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
27
qemu_flush_queued_packets(qemu_get_queue(s->nic));
28
}
29
break;
30
- /* Ignore least significant bit */
31
+ /* The guest is required to store aligned pointers here */
32
case SONIC_RSA:
33
case SONIC_REA:
34
case SONIC_RRP:
35
case SONIC_RWP:
36
- s->regs[reg] = val & 0xfffe;
37
+ if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
38
+ s->regs[reg] = val & 0xfffc;
39
+ } else {
40
+ s->regs[reg] = val & 0xfffe;
41
+ }
42
break;
43
/* Invert written value for some registers */
44
case SONIC_CRCT:
45
--
46
2.5.0
47
48
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
The existing code has a bug where the Remaining Buffer Word Count (RBWC)
4
is calculated with a truncating division, which gives the wrong result
5
for odd-sized packets.
6
7
Section 1.4.1 of the datasheet says,
8
9
Once the end of the packet has been reached, the serializer will
10
fill out the last word (16-bit mode) or long word (32-bit mode)
11
if the last byte did not end on a word or long word boundary
12
respectively. The fill byte will be 0FFh.
13
14
Implement buffer padding so that buffer limits are correctly enforced.
15
16
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
17
Tested-by: Laurent Vivier <laurent@vivier.eu>
18
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
19
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
---
21
hw/net/dp8393x.c | 39 ++++++++++++++++++++++++++++-----------
22
1 file changed, 28 insertions(+), 11 deletions(-)
23
24
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/net/dp8393x.c
27
+++ b/hw/net/dp8393x.c
28
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
29
dp8393xState *s = qemu_get_nic_opaque(nc);
30
int packet_type;
31
uint32_t available, address;
32
- int width, rx_len = pkt_size;
33
+ int width, rx_len, padded_len;
34
uint32_t checksum;
35
int size;
36
37
- width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
38
-
39
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
40
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
41
42
- if (pkt_size + 4 > dp8393x_rbwc(s) * 2) {
43
+ rx_len = pkt_size + sizeof(checksum);
44
+ if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
45
+ width = 2;
46
+ padded_len = ((rx_len - 1) | 3) + 1;
47
+ } else {
48
+ width = 1;
49
+ padded_len = ((rx_len - 1) | 1) + 1;
50
+ }
51
+
52
+ if (padded_len > dp8393x_rbwc(s) * 2) {
53
DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
54
s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
55
dp8393x_update_irq(s);
56
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
57
s->regs[SONIC_TRBA0] = s->regs[SONIC_CRBA0];
58
59
/* Calculate the ethernet checksum */
60
- checksum = cpu_to_le32(crc32(0, buf, rx_len));
61
+ checksum = cpu_to_le32(crc32(0, buf, pkt_size));
62
63
/* Put packet into RBA */
64
DPRINTF("Receive packet at %08x\n", dp8393x_crba(s));
65
address = dp8393x_crba(s);
66
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
67
- buf, rx_len);
68
- address += rx_len;
69
+ buf, pkt_size);
70
+ address += pkt_size;
71
+
72
+ /* Put frame checksum into RBA */
73
address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
74
- &checksum, 4);
75
- address += 4;
76
- rx_len += 4;
77
+ &checksum, sizeof(checksum));
78
+ address += sizeof(checksum);
79
+
80
+ /* Pad short packets to keep pointers aligned */
81
+ if (rx_len < padded_len) {
82
+ size = padded_len - rx_len;
83
+ address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
84
+ (uint8_t *)"\xFF\xFF\xFF", size, 1);
85
+ address += size;
86
+ }
87
+
88
s->regs[SONIC_CRBA1] = address >> 16;
89
s->regs[SONIC_CRBA0] = address & 0xffff;
90
available = dp8393x_rbwc(s);
91
- available -= rx_len / 2;
92
+ available -= padded_len >> 1;
93
s->regs[SONIC_RBWC1] = available >> 16;
94
s->regs[SONIC_RBWC0] = available & 0xffff;
95
96
--
97
2.5.0
98
99
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
When the SONIC receives a packet into the last available descriptor, it
4
retains ownership of that descriptor for as long as necessary.
5
6
Section 3.4.7 of the datasheet says,
7
8
When the system appends more descriptors, the SONIC releases ownership
9
of the descriptor after writing 0000h to the RXpkt.in_use field.
10
11
The packet can now be processed by the host, so raise a PKTRX interrupt,
12
just like the normal case.
13
14
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
15
Tested-by: Laurent Vivier <laurent@vivier.eu>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
hw/net/dp8393x.c | 10 ++++++++++
19
1 file changed, 10 insertions(+)
20
21
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/net/dp8393x.c
24
+++ b/hw/net/dp8393x.c
25
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
26
return -1;
27
}
28
/* Link has been updated by host */
29
+
30
+ /* Clear in_use */
31
+ size = sizeof(uint16_t) * width;
32
+ address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
33
+ dp8393x_put(s, width, 0, 0);
34
+ address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
35
+ (uint8_t *)s->data, size, 1);
36
+
37
+ /* Move to next descriptor */
38
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
39
+ s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
40
}
41
42
/* Save current position */
43
--
44
2.5.0
45
46
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
These operations need to take place regardless of whether or not
4
rx descriptors have been used up (that is, EOL flag was observed).
5
6
The algorithm is now the same for a packet that was withheld as for
7
a packet that was not.
8
9
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
10
Tested-by: Laurent Vivier <laurent@vivier.eu>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/net/dp8393x.c | 12 +++++++-----
14
1 file changed, 7 insertions(+), 5 deletions(-)
15
16
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/dp8393x.c
19
+++ b/hw/net/dp8393x.c
20
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
21
/* Move to next descriptor */
22
s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
23
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
24
- s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) | (((s->regs[SONIC_RSC] & 0x00ff) + 1) & 0x00ff);
25
+ }
26
27
- if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
28
- /* Read next RRA */
29
- dp8393x_do_read_rra(s);
30
- }
31
+ s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) |
32
+ ((s->regs[SONIC_RSC] + 1) & 0x00ff);
33
+
34
+ if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
35
+ /* Read next RRA */
36
+ dp8393x_do_read_rra(s);
37
}
38
39
/* Done */
40
--
41
2.5.0
42
43
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
The jazzsonic driver in Linux uses the Silicon Revision register value
4
to probe the chip. The driver fails unless the SR register contains 4.
5
Unfortunately, reading this register in QEMU usually returns 0 because
6
the s->regs[] array gets wiped after a software reset.
7
8
Fixes: bd8f1ebce4 ("net/dp8393x: fix hardware reset")
9
Suggested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/net/dp8393x.c | 2 +-
14
1 file changed, 1 insertion(+), 1 deletion(-)
15
16
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/dp8393x.c
19
+++ b/hw/net/dp8393x.c
20
@@ -XXX,XX +XXX,XX @@ static void dp8393x_reset(DeviceState *dev)
21
timer_del(s->watchdog);
22
23
memset(s->regs, 0, sizeof(s->regs));
24
+ s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux/mips */
25
s->regs[SONIC_CR] = SONIC_CR_RST | SONIC_CR_STP | SONIC_CR_RXDIS;
26
s->regs[SONIC_DCR] &= ~(SONIC_DCR_EXBUS | SONIC_DCR_LBR);
27
s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | SONIC_RCR_RNT);
28
@@ -XXX,XX +XXX,XX @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
29
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
30
31
s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
32
- s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux */
33
34
memory_region_init_ram(&s->prom, OBJECT(dev),
35
"dp8393x-prom", SONIC_PROM_SIZE, &local_err);
36
--
37
2.5.0
38
39
diff view generated by jsdifflib
Deleted patch
1
From: Finn Thain <fthain@telegraphics.com.au>
2
1
3
Section 3.4.7 of the datasheet explains that,
4
5
The RBE bit in the Interrupt Status register is set when the
6
SONIC finishes using the second to last receive buffer and reads
7
the last RRA descriptor. Actually, the SONIC is not truly out of
8
resources, but gives the system an early warning of an impending
9
out of resources condition.
10
11
RBE does not mean actual receive buffer exhaustion, and reception should
12
not be stopped. This is important because Linux will not check and clear
13
the RBE interrupt until it receives another packet. But that won't
14
happen if can_receive returns false. This bug causes the SONIC to become
15
deaf (until reset).
16
17
Fix this with a new flag to indicate actual receive buffer exhaustion.
18
19
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
20
Tested-by: Laurent Vivier <laurent@vivier.eu>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
23
hw/net/dp8393x.c | 35 ++++++++++++++++++++++-------------
24
1 file changed, 22 insertions(+), 13 deletions(-)
25
26
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/net/dp8393x.c
29
+++ b/hw/net/dp8393x.c
30
@@ -XXX,XX +XXX,XX @@ typedef struct dp8393xState {
31
/* Hardware */
32
uint8_t it_shift;
33
bool big_endian;
34
+ bool last_rba_is_full;
35
qemu_irq irq;
36
#ifdef DEBUG_SONIC
37
int irq_level;
38
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_read_rra(dp8393xState *s)
39
s->regs[SONIC_RRP] = s->regs[SONIC_RSA];
40
}
41
42
- /* Check resource exhaustion */
43
+ /* Warn the host if CRBA now has the last available resource */
44
if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP])
45
{
46
s->regs[SONIC_ISR] |= SONIC_ISR_RBE;
47
dp8393x_update_irq(s);
48
}
49
+
50
+ /* Allow packet reception */
51
+ s->last_rba_is_full = false;
52
}
53
54
static void dp8393x_do_software_reset(dp8393xState *s)
55
@@ -XXX,XX +XXX,XX @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
56
dp8393x_do_read_rra(s);
57
}
58
dp8393x_update_irq(s);
59
- if (dp8393x_can_receive(s->nic->ncs)) {
60
- qemu_flush_queued_packets(qemu_get_queue(s->nic));
61
- }
62
break;
63
/* The guest is required to store aligned pointers here */
64
case SONIC_RSA:
65
@@ -XXX,XX +XXX,XX @@ static int dp8393x_can_receive(NetClientState *nc)
66
67
if (!(s->regs[SONIC_CR] & SONIC_CR_RXEN))
68
return 0;
69
- if (s->regs[SONIC_ISR] & SONIC_ISR_RBE)
70
- return 0;
71
return 1;
72
}
73
74
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
75
s->regs[SONIC_RCR] &= ~(SONIC_RCR_PRX | SONIC_RCR_LBK | SONIC_RCR_FAER |
76
SONIC_RCR_CRCR | SONIC_RCR_LPKT | SONIC_RCR_BC | SONIC_RCR_MC);
77
78
+ if (s->last_rba_is_full) {
79
+ return pkt_size;
80
+ }
81
+
82
rx_len = pkt_size + sizeof(checksum);
83
if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
84
width = 2;
85
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
86
DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
87
s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
88
dp8393x_update_irq(s);
89
- dp8393x_do_read_rra(s);
90
- return pkt_size;
91
+ s->regs[SONIC_RCR] |= SONIC_RCR_LPKT;
92
+ goto done;
93
}
94
95
packet_type = dp8393x_receive_filter(s, buf, pkt_size);
96
@@ -XXX,XX +XXX,XX @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
97
s->regs[SONIC_ISR] |= SONIC_ISR_PKTRX;
98
}
99
100
+ dp8393x_update_irq(s);
101
+
102
s->regs[SONIC_RSC] = (s->regs[SONIC_RSC] & 0xff00) |
103
((s->regs[SONIC_RSC] + 1) & 0x00ff);
104
105
+done:
106
+
107
if (s->regs[SONIC_RCR] & SONIC_RCR_LPKT) {
108
- /* Read next RRA */
109
- dp8393x_do_read_rra(s);
110
+ if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) {
111
+ /* Stop packet reception */
112
+ s->last_rba_is_full = true;
113
+ } else {
114
+ /* Read next resource */
115
+ dp8393x_do_read_rra(s);
116
+ }
117
}
118
119
- /* Done */
120
- dp8393x_update_irq(s);
121
-
122
return pkt_size;
123
}
124
125
--
126
2.5.0
127
128
diff view generated by jsdifflib
Deleted patch
1
From: Yuri Benditovich <yuri.benditovich@daynix.com>
2
1
3
https://bugzilla.redhat.com/show_bug.cgi?id=1787142
4
The emulation issues hw_error if PSRCTL register
5
is written, for example, with zero value.
6
Such configuration does not present any problem when
7
DTYP bits of RCTL register define legacy format of
8
transfer descriptors. Current commit discards check
9
for BSIZE0 and BSIZE1 when legacy mode used.
10
11
Acked-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
12
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
hw/net/e1000e_core.c | 13 ++++++++-----
16
1 file changed, 8 insertions(+), 5 deletions(-)
17
18
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/e1000e_core.c
21
+++ b/hw/net/e1000e_core.c
22
@@ -XXX,XX +XXX,XX @@ e1000e_set_eitr(E1000ECore *core, int index, uint32_t val)
23
static void
24
e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val)
25
{
26
- if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
27
- hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero");
28
- }
29
+ if (core->mac[RCTL] & E1000_RCTL_DTYP_MASK) {
30
+
31
+ if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
32
+ hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero");
33
+ }
34
35
- if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
36
- hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero");
37
+ if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
38
+ hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero");
39
+ }
40
}
41
42
core->mac[PSRCTL] = val;
43
--
44
2.5.0
45
46
diff view generated by jsdifflib
Deleted patch
1
From: Yuri Benditovich <yuri.benditovich@daynix.com>
2
1
3
When requested to calculate the hash for TCPV6 packet,
4
ignore overrides of source and destination addresses
5
in in extension headers.
6
Use these overrides when new hash type NetPktRssIpV6TcpEx
7
requested.
8
Use this type in e1000e hash calculation for IPv6 TCP, which
9
should take in account overrides of the addresses.
10
11
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
12
Acked-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
hw/net/e1000e_core.c | 2 +-
16
hw/net/net_rx_pkt.c | 2 +-
17
2 files changed, 2 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/e1000e_core.c
22
+++ b/hw/net/e1000e_core.c
23
@@ -XXX,XX +XXX,XX @@ e1000e_rss_calc_hash(E1000ECore *core,
24
type = NetPktRssIpV4Tcp;
25
break;
26
case E1000_MRQ_RSS_TYPE_IPV6TCP:
27
- type = NetPktRssIpV6Tcp;
28
+ type = NetPktRssIpV6TcpEx;
29
break;
30
case E1000_MRQ_RSS_TYPE_IPV6:
31
type = NetPktRssIpV6;
32
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/net/net_rx_pkt.c
35
+++ b/hw/net/net_rx_pkt.c
36
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
37
assert(pkt->isip6);
38
assert(pkt->istcp);
39
trace_net_rx_pkt_rss_ip6_tcp();
40
- _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
41
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
42
_net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
43
break;
44
case NetPktRssIpV6:
45
--
46
2.5.0
47
48
diff view generated by jsdifflib
Deleted patch
1
From: Bin Meng <bmeng.cn@gmail.com>
2
1
3
When CADENCE_GEM_ERR_DEBUG is turned on, there are several
4
compilation errors in DB_PRINT(). Fix them.
5
6
While we are here, update to use appropriate modifiers in
7
the same DB_PRINT() call.
8
9
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
10
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/net/cadence_gem.c | 11 ++++++-----
14
1 file changed, 6 insertions(+), 5 deletions(-)
15
16
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/cadence_gem.c
19
+++ b/hw/net/cadence_gem.c
20
@@ -XXX,XX +XXX,XX @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
21
return -1;
22
}
23
24
- DB_PRINT("copy %d bytes to 0x%x\n", MIN(bytes_to_copy, rxbufsize),
25
- rx_desc_get_buffer(s->rx_desc[q]));
26
+ DB_PRINT("copy %u bytes to 0x%" PRIx64 "\n",
27
+ MIN(bytes_to_copy, rxbufsize),
28
+ rx_desc_get_buffer(s, s->rx_desc[q]));
29
30
/* Copy packet data to emulated DMA buffer */
31
address_space_write(&s->dma_as, rx_desc_get_buffer(s, s->rx_desc[q]) +
32
@@ -XXX,XX +XXX,XX @@ static void gem_transmit(CadenceGEMState *s)
33
34
if (tx_desc_get_length(desc) > sizeof(tx_packet) -
35
(p - tx_packet)) {
36
- DB_PRINT("TX descriptor @ 0x%x too large: size 0x%x space " \
37
- "0x%x\n", (unsigned)packet_desc_addr,
38
- (unsigned)tx_desc_get_length(desc),
39
+ DB_PRINT("TX descriptor @ 0x%" HWADDR_PRIx \
40
+ " too large: size 0x%x space 0x%zx\n",
41
+ packet_desc_addr, tx_desc_get_length(desc),
42
sizeof(tx_packet) - (p - tx_packet));
43
break;
44
}
45
--
46
2.5.0
47
48
diff view generated by jsdifflib
Deleted patch
1
From: Lukas Straub <lukasstraub2@web.de>
2
1
3
After failover the Secondary side of replication shouldn't change state, because
4
it now functions as our primary disk.
5
6
In replication_start, replication_do_checkpoint, replication_stop, ignore
7
the request if current state is BLOCK_REPLICATION_DONE (sucessful failover) or
8
BLOCK_REPLICATION_FAILOVER (failover in progres i.e. currently merging active
9
and hidden images into the base image).
10
11
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
12
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
13
Acked-by: Max Reitz <mreitz@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
block/replication.c | 35 ++++++++++++++++++++++++++++++++++-
17
1 file changed, 34 insertions(+), 1 deletion(-)
18
19
diff --git a/block/replication.c b/block/replication.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/replication.c
22
+++ b/block/replication.c
23
@@ -XXX,XX +XXX,XX @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
24
aio_context_acquire(aio_context);
25
s = bs->opaque;
26
27
+ if (s->stage == BLOCK_REPLICATION_DONE ||
28
+ s->stage == BLOCK_REPLICATION_FAILOVER) {
29
+ /*
30
+ * This case happens when a secondary is promoted to primary.
31
+ * Ignore the request because the secondary side of replication
32
+ * doesn't have to do anything anymore.
33
+ */
34
+ aio_context_release(aio_context);
35
+ return;
36
+ }
37
+
38
if (s->stage != BLOCK_REPLICATION_NONE) {
39
error_setg(errp, "Block replication is running or done");
40
aio_context_release(aio_context);
41
@@ -XXX,XX +XXX,XX @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
42
aio_context_acquire(aio_context);
43
s = bs->opaque;
44
45
+ if (s->stage == BLOCK_REPLICATION_DONE ||
46
+ s->stage == BLOCK_REPLICATION_FAILOVER) {
47
+ /*
48
+ * This case happens when a secondary was promoted to primary.
49
+ * Ignore the request because the secondary side of replication
50
+ * doesn't have to do anything anymore.
51
+ */
52
+ aio_context_release(aio_context);
53
+ return;
54
+ }
55
+
56
if (s->mode == REPLICATION_MODE_SECONDARY) {
57
secondary_do_checkpoint(s, errp);
58
}
59
@@ -XXX,XX +XXX,XX @@ static void replication_get_error(ReplicationState *rs, Error **errp)
60
aio_context_acquire(aio_context);
61
s = bs->opaque;
62
63
- if (s->stage != BLOCK_REPLICATION_RUNNING) {
64
+ if (s->stage == BLOCK_REPLICATION_NONE) {
65
error_setg(errp, "Block replication is not running");
66
aio_context_release(aio_context);
67
return;
68
@@ -XXX,XX +XXX,XX @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
69
aio_context_acquire(aio_context);
70
s = bs->opaque;
71
72
+ if (s->stage == BLOCK_REPLICATION_DONE ||
73
+ s->stage == BLOCK_REPLICATION_FAILOVER) {
74
+ /*
75
+ * This case happens when a secondary was promoted to primary.
76
+ * Ignore the request because the secondary side of replication
77
+ * doesn't have to do anything anymore.
78
+ */
79
+ aio_context_release(aio_context);
80
+ return;
81
+ }
82
+
83
if (s->stage != BLOCK_REPLICATION_RUNNING) {
84
error_setg(errp, "Block replication is not running");
85
aio_context_release(aio_context);
86
--
87
2.5.0
88
89
diff view generated by jsdifflib
Deleted patch
1
From: Lukas Straub <lukasstraub2@web.de>
2
1
3
This simulates the case that happens when we resume COLO after failover.
4
5
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
tests/test-replication.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
9
1 file changed, 52 insertions(+)
10
11
diff --git a/tests/test-replication.c b/tests/test-replication.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tests/test-replication.c
14
+++ b/tests/test-replication.c
15
@@ -XXX,XX +XXX,XX @@ static void test_secondary_stop(void)
16
teardown_secondary();
17
}
18
19
+static void test_secondary_continuous_replication(void)
20
+{
21
+ BlockBackend *top_blk, *local_blk;
22
+ Error *local_err = NULL;
23
+
24
+ top_blk = start_secondary();
25
+ replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
26
+ g_assert(!local_err);
27
+
28
+ /* write 0x22 to s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
29
+ local_blk = blk_by_name(S_LOCAL_DISK_ID);
30
+ test_blk_write(local_blk, 0x22, IMG_SIZE / 2, IMG_SIZE / 2, false);
31
+
32
+ /* replication will backup s_local_disk to s_hidden_disk */
33
+ test_blk_read(top_blk, 0x11, IMG_SIZE / 2,
34
+ IMG_SIZE / 2, 0, IMG_SIZE, false);
35
+
36
+ /* write 0x33 to s_active_disk (0, IMG_SIZE / 2) */
37
+ test_blk_write(top_blk, 0x33, 0, IMG_SIZE / 2, false);
38
+
39
+ /* do failover (active commit) */
40
+ replication_stop_all(true, &local_err);
41
+ g_assert(!local_err);
42
+
43
+ /* it should ignore all requests from now on */
44
+
45
+ /* start after failover */
46
+ replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
47
+ g_assert(!local_err);
48
+
49
+ /* checkpoint */
50
+ replication_do_checkpoint_all(&local_err);
51
+ g_assert(!local_err);
52
+
53
+ /* stop */
54
+ replication_stop_all(true, &local_err);
55
+ g_assert(!local_err);
56
+
57
+ /* read from s_local_disk (0, IMG_SIZE / 2) */
58
+ test_blk_read(top_blk, 0x33, 0, IMG_SIZE / 2,
59
+ 0, IMG_SIZE / 2, false);
60
+
61
+
62
+ /* read from s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
63
+ test_blk_read(top_blk, 0x22, IMG_SIZE / 2,
64
+ IMG_SIZE / 2, 0, IMG_SIZE, false);
65
+
66
+ teardown_secondary();
67
+}
68
+
69
static void test_secondary_do_checkpoint(void)
70
{
71
BlockBackend *top_blk, *local_blk;
72
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
73
g_test_add_func("/replication/secondary/write", test_secondary_write);
74
g_test_add_func("/replication/secondary/start", test_secondary_start);
75
g_test_add_func("/replication/secondary/stop", test_secondary_stop);
76
+ g_test_add_func("/replication/secondary/continuous_replication",
77
+ test_secondary_continuous_replication);
78
g_test_add_func("/replication/secondary/do_checkpoint",
79
test_secondary_do_checkpoint);
80
g_test_add_func("/replication/secondary/get_error_all",
81
--
82
2.5.0
83
84
diff view generated by jsdifflib
Deleted patch
1
From: Lukas Straub <lukasstraub2@web.de>
2
1
3
To switch the Secondary to Primary, we need to insert new filters
4
before the filter-rewriter.
5
6
Add the options insert= and position= to be able to insert filters
7
anywhere in the filter list.
8
9
position should be "head" or "tail" to insert at the head or
10
tail of the filter list or it should be "id=<id>" to specify
11
the id of another filter.
12
insert should be either "before" or "behind" to specify where to
13
insert the new filter relative to the one specified with position.
14
15
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
16
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
19
include/net/filter.h | 2 ++
20
net/filter.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++-
21
qemu-options.hx | 31 +++++++++++++++---
22
3 files changed, 119 insertions(+), 6 deletions(-)
23
24
diff --git a/include/net/filter.h b/include/net/filter.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/net/filter.h
27
+++ b/include/net/filter.h
28
@@ -XXX,XX +XXX,XX @@ struct NetFilterState {
29
NetClientState *netdev;
30
NetFilterDirection direction;
31
bool on;
32
+ char *position;
33
+ bool insert_before_flag;
34
QTAILQ_ENTRY(NetFilterState) next;
35
};
36
37
diff --git a/net/filter.c b/net/filter.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/net/filter.c
40
+++ b/net/filter.c
41
@@ -XXX,XX +XXX,XX @@ static void netfilter_set_status(Object *obj, const char *str, Error **errp)
42
}
43
}
44
45
+static char *netfilter_get_position(Object *obj, Error **errp)
46
+{
47
+ NetFilterState *nf = NETFILTER(obj);
48
+
49
+ return g_strdup(nf->position);
50
+}
51
+
52
+static void netfilter_set_position(Object *obj, const char *str, Error **errp)
53
+{
54
+ NetFilterState *nf = NETFILTER(obj);
55
+
56
+ nf->position = g_strdup(str);
57
+}
58
+
59
+static char *netfilter_get_insert(Object *obj, Error **errp)
60
+{
61
+ NetFilterState *nf = NETFILTER(obj);
62
+
63
+ return nf->insert_before_flag ? g_strdup("before") : g_strdup("behind");
64
+}
65
+
66
+static void netfilter_set_insert(Object *obj, const char *str, Error **errp)
67
+{
68
+ NetFilterState *nf = NETFILTER(obj);
69
+
70
+ if (strcmp(str, "before") && strcmp(str, "behind")) {
71
+ error_setg(errp, "Invalid value for netfilter insert, "
72
+ "should be 'before' or 'behind'");
73
+ return;
74
+ }
75
+
76
+ nf->insert_before_flag = !strcmp(str, "before");
77
+}
78
+
79
static void netfilter_init(Object *obj)
80
{
81
NetFilterState *nf = NETFILTER(obj);
82
83
nf->on = true;
84
+ nf->insert_before_flag = false;
85
+ nf->position = g_strdup("tail");
86
87
object_property_add_str(obj, "netdev",
88
netfilter_get_netdev_id, netfilter_set_netdev_id,
89
@@ -XXX,XX +XXX,XX @@ static void netfilter_init(Object *obj)
90
object_property_add_str(obj, "status",
91
netfilter_get_status, netfilter_set_status,
92
NULL);
93
+ object_property_add_str(obj, "position",
94
+ netfilter_get_position, netfilter_set_position,
95
+ NULL);
96
+ object_property_add_str(obj, "insert",
97
+ netfilter_get_insert, netfilter_set_insert,
98
+ NULL);
99
}
100
101
static void netfilter_complete(UserCreatable *uc, Error **errp)
102
{
103
NetFilterState *nf = NETFILTER(uc);
104
+ NetFilterState *position = NULL;
105
NetClientState *ncs[MAX_QUEUE_NUM];
106
NetFilterClass *nfc = NETFILTER_GET_CLASS(uc);
107
int queues;
108
@@ -XXX,XX +XXX,XX @@ static void netfilter_complete(UserCreatable *uc, Error **errp)
109
return;
110
}
111
112
+ if (strcmp(nf->position, "head") && strcmp(nf->position, "tail")) {
113
+ Object *container;
114
+ Object *obj;
115
+ char *position_id;
116
+
117
+ if (!g_str_has_prefix(nf->position, "id=")) {
118
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "position",
119
+ "'head', 'tail' or 'id=<id>'");
120
+ return;
121
+ }
122
+
123
+ /* get the id from the string */
124
+ position_id = g_strndup(nf->position + 3, strlen(nf->position) - 3);
125
+
126
+ /* Search for the position to insert before/behind */
127
+ container = object_get_objects_root();
128
+ obj = object_resolve_path_component(container, position_id);
129
+ if (!obj) {
130
+ error_setg(errp, "filter '%s' not found", position_id);
131
+ g_free(position_id);
132
+ return;
133
+ }
134
+
135
+ position = NETFILTER(obj);
136
+
137
+ if (position->netdev != ncs[0]) {
138
+ error_setg(errp, "filter '%s' belongs to a different netdev",
139
+ position_id);
140
+ g_free(position_id);
141
+ return;
142
+ }
143
+
144
+ g_free(position_id);
145
+ }
146
+
147
nf->netdev = ncs[0];
148
149
if (nfc->setup) {
150
@@ -XXX,XX +XXX,XX @@ static void netfilter_complete(UserCreatable *uc, Error **errp)
151
return;
152
}
153
}
154
- QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
155
+
156
+ if (position) {
157
+ if (nf->insert_before_flag) {
158
+ QTAILQ_INSERT_BEFORE(position, nf, next);
159
+ } else {
160
+ QTAILQ_INSERT_AFTER(&nf->netdev->filters, position, nf, next);
161
+ }
162
+ } else if (!strcmp(nf->position, "head")) {
163
+ QTAILQ_INSERT_HEAD(&nf->netdev->filters, nf, next);
164
+ } else if (!strcmp(nf->position, "tail")) {
165
+ QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next);
166
+ }
167
}
168
169
static void netfilter_finalize(Object *obj)
170
@@ -XXX,XX +XXX,XX @@ static void netfilter_finalize(Object *obj)
171
QTAILQ_REMOVE(&nf->netdev->filters, nf, next);
172
}
173
g_free(nf->netdev_id);
174
+ g_free(nf->position);
175
}
176
177
static void default_handle_event(NetFilterState *nf, int event, Error **errp)
178
diff --git a/qemu-options.hx b/qemu-options.hx
179
index XXXXXXX..XXXXXXX 100644
180
--- a/qemu-options.hx
181
+++ b/qemu-options.hx
182
@@ -XXX,XX +XXX,XX @@ applications, they can do this through this parameter. Its format is
183
a gnutls priority string as described at
184
@url{https://gnutls.org/manual/html_node/Priority-Strings.html}.
185
186
-@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}][,status=@var{on|off}]
187
+@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}][,status=@var{on|off}][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
188
189
Interval @var{t} can't be 0, this filter batches the packet delivery: all
190
packets arriving in a given interval on netdev @var{netdevid} are delayed
191
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
192
@option{tx}: the filter is attached to the transmit queue of the netdev,
193
where it will receive packets sent by the netdev.
194
195
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
196
+position @var{head|tail|id=<id>} is an option to specify where the
197
+filter should be inserted in the filter list. It can be applied to any
198
+netfilter.
199
+
200
+@option{head}: the filter is inserted at the head of the filter
201
+ list, before any existing filters.
202
+
203
+@option{tail}: the filter is inserted at the tail of the filter
204
+ list, behind any existing filters (default).
205
+
206
+@option{id=<id>}: the filter is inserted before or behind the filter
207
+ specified by <id>, see the insert option below.
208
+
209
+insert @var{behind|before} is an option to specify where to insert the
210
+new filter relative to the one specified with position=id=<id>. It can
211
+be applied to any netfilter.
212
+
213
+@option{before}: insert before the specified filter.
214
+
215
+@option{behind}: insert behind the specified filter (default).
216
+
217
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
218
219
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
220
221
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
222
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
223
224
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
225
@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
226
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
227
be the same. we can just use indev or outdev, but at least one of indev or outdev
228
need to be specified.
229
230
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
231
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
232
233
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
234
secondary from primary to keep secondary tcp connection,and rewrite
235
@@ -XXX,XX +XXX,XX @@ colo secondary:
236
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
237
-object filter-rewriter,id=rew0,netdev=hn0,queue=all
238
239
-@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}]
240
+@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}][,position=@var{head|tail|id=<id>}][,insert=@var{behind|before}]
241
242
Dump the network traffic on netdev @var{dev} to the file specified by
243
@var{filename}. At most @var{len} bytes (64k by default) per packet are stored.
244
--
245
2.5.0
246
247
diff view generated by jsdifflib
Deleted patch
1
From: Lukas Straub <lukasstraub2@web.de>
2
1
3
Document the qemu command-line and qmp commands for continuous replication
4
5
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
docs/COLO-FT.txt | 224 +++++++++++++++++++++++++++++++++------------
9
docs/block-replication.txt | 28 ++++--
10
2 files changed, 184 insertions(+), 68 deletions(-)
11
12
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
13
index XXXXXXX..XXXXXXX 100644
14
--- a/docs/COLO-FT.txt
15
+++ b/docs/COLO-FT.txt
16
@@ -XXX,XX +XXX,XX @@ The diagram just shows the main qmp command, you can get the detail
17
in test procedure.
18
19
== Test procedure ==
20
-1. Startup qemu
21
-Primary:
22
-# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name primary \
23
- -device piix3-usb-uhci -vnc :7 \
24
- -device usb-tablet -netdev tap,id=hn0,vhost=off \
25
- -device virtio-net-pci,id=net-pci0,netdev=hn0 \
26
- -drive if=virtio,id=primary-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
27
- children.0.file.filename=1.raw,\
28
- children.0.driver=raw -S
29
-Secondary:
30
-# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name secondary \
31
- -device piix3-usb-uhci -vnc :7 \
32
- -device usb-tablet -netdev tap,id=hn0,vhost=off \
33
- -device virtio-net-pci,id=net-pci0,netdev=hn0 \
34
- -drive if=none,id=secondary-disk0,file.filename=1.raw,driver=raw,node-name=node0 \
35
- -drive if=virtio,id=active-disk0,driver=replication,mode=secondary,\
36
- file.driver=qcow2,top-id=active-disk0,\
37
- file.file.filename=/mnt/ramfs/active_disk.img,\
38
- file.backing.driver=qcow2,\
39
- file.backing.file.filename=/mnt/ramfs/hidden_disk.img,\
40
- file.backing.backing=secondary-disk0 \
41
- -incoming tcp:0:8888
42
-
43
-2. On Secondary VM's QEMU monitor, issue command
44
+Note: Here we are running both instances on the same host for testing,
45
+change the IP Addresses if you want to run it on two hosts. Initally
46
+127.0.0.1 is the Primary Host and 127.0.0.2 is the Secondary Host.
47
+
48
+== Startup qemu ==
49
+1. Primary:
50
+Note: Initally, $imagefolder/primary.qcow2 needs to be copied to all hosts.
51
+You don't need to change any IP's here, because 0.0.0.0 listens on any
52
+interface. The chardev's with 127.0.0.1 IP's loopback to the local qemu
53
+instance.
54
+
55
+# imagefolder="/mnt/vms/colo-test-primary"
56
+
57
+# qemu-system-x86_64 -enable-kvm -cpu qemu64,+kvmclock -m 512 -smp 1 -qmp stdio \
58
+ -device piix3-usb-uhci -device usb-tablet -name primary \
59
+ -netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \
60
+ -device rtl8139,id=e0,netdev=hn0 \
61
+ -chardev socket,id=mirror0,host=0.0.0.0,port=9003,server,nowait \
62
+ -chardev socket,id=compare1,host=0.0.0.0,port=9004,server,wait \
63
+ -chardev socket,id=compare0,host=127.0.0.1,port=9001,server,nowait \
64
+ -chardev socket,id=compare0-0,host=127.0.0.1,port=9001 \
65
+ -chardev socket,id=compare_out,host=127.0.0.1,port=9005,server,nowait \
66
+ -chardev socket,id=compare_out0,host=127.0.0.1,port=9005 \
67
+ -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 \
68
+ -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out \
69
+ -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 \
70
+ -object iothread,id=iothread1 \
71
+ -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,\
72
+outdev=compare_out0,iothread=iothread1 \
73
+ -drive if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
74
+children.0.file.filename=$imagefolder/primary.qcow2,children.0.driver=qcow2 -S
75
+
76
+2. Secondary:
77
+Note: Active and hidden images need to be created only once and the
78
+size should be the same as primary.qcow2. Again, you don't need to change
79
+any IP's here, except for the $primary_ip variable.
80
+
81
+# imagefolder="/mnt/vms/colo-test-secondary"
82
+# primary_ip=127.0.0.1
83
+
84
+# qemu-img create -f qcow2 $imagefolder/secondary-active.qcow2 10G
85
+
86
+# qemu-img create -f qcow2 $imagefolder/secondary-hidden.qcow2 10G
87
+
88
+# qemu-system-x86_64 -enable-kvm -cpu qemu64,+kvmclock -m 512 -smp 1 -qmp stdio \
89
+ -device piix3-usb-uhci -device usb-tablet -name secondary \
90
+ -netdev tap,id=hn0,vhost=off,helper=/usr/lib/qemu/qemu-bridge-helper \
91
+ -device rtl8139,id=e0,netdev=hn0 \
92
+ -chardev socket,id=red0,host=$primary_ip,port=9003,reconnect=1 \
93
+ -chardev socket,id=red1,host=$primary_ip,port=9004,reconnect=1 \
94
+ -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \
95
+ -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \
96
+ -object filter-rewriter,id=rew0,netdev=hn0,queue=all \
97
+ -drive if=none,id=parent0,file.filename=$imagefolder/primary.qcow2,driver=qcow2 \
98
+ -drive if=none,id=childs0,driver=replication,mode=secondary,file.driver=qcow2,\
99
+top-id=colo-disk0,file.file.filename=$imagefolder/secondary-active.qcow2,\
100
+file.backing.driver=qcow2,file.backing.file.filename=$imagefolder/secondary-hidden.qcow2,\
101
+file.backing.backing=parent0 \
102
+ -drive if=ide,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
103
+children.0=childs0 \
104
+ -incoming tcp:0.0.0.0:9998
105
+
106
+
107
+3. On Secondary VM's QEMU monitor, issue command
108
{'execute':'qmp_capabilities'}
109
-{ 'execute': 'nbd-server-start',
110
- 'arguments': {'addr': {'type': 'inet', 'data': {'host': 'xx.xx.xx.xx', 'port': '8889'} } }
111
-}
112
-{'execute': 'nbd-server-add', 'arguments': {'device': 'secondary-disk0', 'writable': true } }
113
+{'execute': 'nbd-server-start', 'arguments': {'addr': {'type': 'inet', 'data': {'host': '0.0.0.0', 'port': '9999'} } } }
114
+{'execute': 'nbd-server-add', 'arguments': {'device': 'parent0', 'writable': true } }
115
116
Note:
117
a. The qmp command nbd-server-start and nbd-server-add must be run
118
before running the qmp command migrate on primary QEMU
119
b. Active disk, hidden disk and nbd target's length should be the
120
same.
121
- c. It is better to put active disk and hidden disk in ramdisk.
122
+ c. It is better to put active disk and hidden disk in ramdisk. They
123
+ will be merged into the parent disk on failover.
124
125
-3. On Primary VM's QEMU monitor, issue command:
126
+4. On Primary VM's QEMU monitor, issue command:
127
{'execute':'qmp_capabilities'}
128
-{ 'execute': 'human-monitor-command',
129
- 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xx.xx.xx.xx,file.port=8889,file.export=secondary-disk0,node-name=nbd_client0'}}
130
-{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'primary-disk0', 'node': 'nbd_client0' } }
131
-{ 'execute': 'migrate-set-capabilities',
132
- 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
133
-{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:xx.xx.xx.xx:8888' } }
134
+{'execute': 'human-monitor-command', 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
135
+{'execute': 'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': 'replication0' } }
136
+{'execute': 'migrate-set-capabilities', 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
137
+{'execute': 'migrate', 'arguments': {'uri': 'tcp:127.0.0.2:9998' } }
138
139
Note:
140
a. There should be only one NBD Client for each primary disk.
141
- b. xx.xx.xx.xx is the secondary physical machine's hostname or IP
142
- c. The qmp command line must be run after running qmp command line in
143
+ b. The qmp command line must be run after running qmp command line in
144
secondary qemu.
145
146
-4. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
147
+5. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
148
You can issue command '{ "execute": "migrate-set-parameters" , "arguments":{ "x-checkpoint-delay": 2000 } }'
149
-to change the checkpoint period time
150
+to change the idle checkpoint period time
151
+
152
+6. Failover test
153
+You can kill one of the VMs and Failover on the surviving VM:
154
+
155
+If you killed the Secondary, then follow "Primary Failover". After that,
156
+if you want to resume the replication, follow "Primary resume replication"
157
+
158
+If you killed the Primary, then follow "Secondary Failover". After that,
159
+if you want to resume the replication, follow "Secondary resume replication"
160
+
161
+== Primary Failover ==
162
+The Secondary died, resume on the Primary
163
+
164
+{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'child': 'children.1'} }
165
+{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_del replication0' } }
166
+{'execute': 'object-del', 'arguments':{ 'id': 'comp0' } }
167
+{'execute': 'object-del', 'arguments':{ 'id': 'iothread1' } }
168
+{'execute': 'object-del', 'arguments':{ 'id': 'm0' } }
169
+{'execute': 'object-del', 'arguments':{ 'id': 'redire0' } }
170
+{'execute': 'object-del', 'arguments':{ 'id': 'redire1' } }
171
+{'execute': 'x-colo-lost-heartbeat' }
172
+
173
+== Secondary Failover ==
174
+The Primary died, resume on the Secondary and prepare to become the new Primary
175
+
176
+{'execute': 'nbd-server-stop'}
177
+{'execute': 'x-colo-lost-heartbeat'}
178
+
179
+{'execute': 'object-del', 'arguments':{ 'id': 'f2' } }
180
+{'execute': 'object-del', 'arguments':{ 'id': 'f1' } }
181
+{'execute': 'chardev-remove', 'arguments':{ 'id': 'red1' } }
182
+{'execute': 'chardev-remove', 'arguments':{ 'id': 'red0' } }
183
+
184
+{'execute': 'chardev-add', 'arguments':{ 'id': 'mirror0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9003' } }, 'server': true } } } }
185
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare1', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9004' } }, 'server': true } } } }
186
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': true } } } }
187
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0-0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': false } } } }
188
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': true } } } }
189
+{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': false } } } }
190
+
191
+== Primary resume replication ==
192
+Resume replication after new Secondary is up.
193
+
194
+Start the new Secondary (Steps 2 and 3 above), then on the Primary:
195
+{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.2:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
196
+
197
+Wait until disk is synced, then:
198
+{'execute': 'stop'}
199
+{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync'} }
200
+
201
+{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
202
+{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
203
+
204
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
205
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
206
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
207
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
208
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
209
+
210
+{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
211
+{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.2:9998' } }
212
+
213
+Note:
214
+If this Primary previously was a Secondary, then we need to insert the
215
+filters before the filter-rewriter by using the
216
+"'insert': 'before', 'position': 'id=rew0'" Options. See below.
217
+
218
+== Secondary resume replication ==
219
+Become Primary and resume replication after new Secondary is up. Note
220
+that now 127.0.0.1 is the Secondary and 127.0.0.2 is the Primary.
221
+
222
+Start the new Secondary (Steps 2 and 3 above, but with primary_ip=127.0.0.2),
223
+then on the old Secondary:
224
+{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.1:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
225
+
226
+Wait until disk is synced, then:
227
+{'execute': 'stop'}
228
+{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync' } }
229
230
-5. Failover test
231
-You can kill Primary VM and run 'x_colo_lost_heartbeat' in Secondary VM's
232
-monitor at the same time, then SVM will failover and client will not detect this
233
-change.
234
+{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0'}}
235
+{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
236
237
-Before issuing '{ "execute": "x-colo-lost-heartbeat" }' command, we have to
238
-issue block related command to stop block replication.
239
-Primary:
240
- Remove the nbd child from the quorum:
241
- { 'execute': 'x-blockdev-change', 'arguments': {'parent': 'colo-disk0', 'child': 'children.1'}}
242
- { 'execute': 'human-monitor-command','arguments': {'command-line': 'drive_del blk-buddy0'}}
243
- Note: there is no qmp command to remove the blockdev now
244
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
245
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
246
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
247
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
248
+{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
249
250
-Secondary:
251
- The primary host is down, so we should do the following thing:
252
- { 'execute': 'nbd-server-stop' }
253
+{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
254
+{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.1:9998' } }
255
256
== TODO ==
257
-1. Support continuous VM replication.
258
-2. Support shared storage.
259
-3. Develop the heartbeat part.
260
-4. Reduce checkpoint VM’s downtime while doing checkpoint.
261
+1. Support shared storage.
262
+2. Develop the heartbeat part.
263
+3. Reduce checkpoint VM’s downtime while doing checkpoint.
264
diff --git a/docs/block-replication.txt b/docs/block-replication.txt
265
index XXXXXXX..XXXXXXX 100644
266
--- a/docs/block-replication.txt
267
+++ b/docs/block-replication.txt
268
@@ -XXX,XX +XXX,XX @@ blocks that are already in QEMU.
269
^ || .----------
270
| || | Secondary
271
1 Quorum || '----------
272
- / \ ||
273
- / \ ||
274
- Primary 2 filter
275
- disk ^ virtio-blk
276
- | ^
277
- 3 NBD -------> 3 NBD |
278
+ / \ || virtio-blk
279
+ / \ || ^
280
+ Primary 2 filter |
281
+ disk ^ 7 Quorum
282
+ | /
283
+ 3 NBD -------> 3 NBD /
284
client || server 2 filter
285
|| ^ ^
286
--------. || | |
287
@@ -XXX,XX +XXX,XX @@ any state that would otherwise be lost by the speculative write-through
288
of the NBD server into the secondary disk. So before block replication,
289
the primary disk and secondary disk should contain the same data.
290
291
+7) The secondary also has a quorum node, so after secondary failover it
292
+can become the new primary and continue replication.
293
+
294
+
295
== Failure Handling ==
296
There are 7 internal errors when block replication is running:
297
1. I/O error on primary disk
298
@@ -XXX,XX +XXX,XX @@ Primary:
299
leading whitespace.
300
5. The qmp command line must be run after running qmp command line in
301
secondary qemu.
302
- 6. After failover we need remove children.1 (replication driver).
303
+ 6. After primary failover we need remove children.1 (replication driver).
304
305
Secondary:
306
-drive if=none,driver=raw,file.filename=1.raw,id=colo1 \
307
- -drive if=xxx,id=topxxx,driver=replication,mode=secondary,top-id=topxxx\
308
+ -drive if=none,id=childs1,driver=replication,mode=secondary,top-id=childs1
309
file.file.filename=active_disk.qcow2,\
310
file.driver=qcow2,\
311
file.backing.file.filename=hidden_disk.qcow2,\
312
file.backing.driver=qcow2,\
313
file.backing.backing=colo1
314
+ -drive if=xxx,driver=quorum,read-pattern=fifo,id=top-disk1,\
315
+ vote-threshold=1,children.0=childs1
316
317
Then run qmp command in secondary qemu:
318
{ 'execute': 'nbd-server-start',
319
@@ -XXX,XX +XXX,XX @@ Secondary:
320
The primary host is down, so we should do the following thing:
321
{ 'execute': 'nbd-server-stop' }
322
323
+Promote Secondary to Primary:
324
+ see COLO-FT.txt
325
+
326
TODO:
327
-1. Continuous block replication
328
-2. Shared disk
329
+1. Shared disk
330
--
331
2.5.0
332
333
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Brad Smith <brad@comstyle.com>
2
2
3
The L2TPv3 RFC number is 3931:
3
OpenBSD added support for tap(4) 10 releases ago.
4
https://tools.ietf.org/html/rfc3931
5
4
6
Reported-by: Henrik Johansson <henrikjohansson@rocketmail.com>
5
Remove the special casing for older releases.
7
Reviewed-by: Stefan Weil <sw@weilnetz.de>
6
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Brad Smith <brad@comstyle.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
9
---
11
qemu-options.hx | 4 ++--
10
net/tap-bsd.c | 8 --------
12
1 file changed, 2 insertions(+), 2 deletions(-)
11
1 file changed, 8 deletions(-)
13
12
14
diff --git a/qemu-options.hx b/qemu-options.hx
13
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-options.hx
15
--- a/net/tap-bsd.c
17
+++ b/qemu-options.hx
16
+++ b/net/tap-bsd.c
18
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
17
@@ -XXX,XX +XXX,XX @@
19
" Linux kernel 3.3+ as well as most routers can talk\n"
18
#include <net/if_tap.h>
20
" L2TPv3. This transport allows connecting a VM to a VM,\n"
19
#endif
21
" VM to a router and even VM to Host. It is a nearly-universal\n"
20
22
- " standard (RFC3391). Note - this implementation uses static\n"
21
-#if defined(__OpenBSD__)
23
+ " standard (RFC3931). Note - this implementation uses static\n"
22
-#include <sys/param.h>
24
" pre-configured tunnels (same as the Linux kernel).\n"
23
-#endif
25
" use 'src=' to specify source address\n"
24
-
26
" use 'dst=' to specify destination address\n"
25
#ifndef __FreeBSD__
27
@@ -XXX,XX +XXX,XX @@ Example (send packets from host's 1.2.3.4):
26
int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
28
@end example
27
int vnet_hdr_required, int mq_required, Error **errp)
29
28
@@ -XXX,XX +XXX,XX @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
30
@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
29
if (*ifname) {
31
-Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3391) is a
30
snprintf(dname, sizeof dname, "/dev/%s", ifname);
32
+Configure a L2TPv3 pseudowire host network backend. L2TPv3 (RFC3931) is a
31
} else {
33
popular protocol to transport Ethernet (and other Layer 2) data frames between
32
-#if defined(__OpenBSD__) && OpenBSD < 201605
34
two systems. It is present in routers, firewalls and the Linux kernel
33
- snprintf(dname, sizeof dname, "/dev/tun%d", i);
35
(from version 3.3 onwards).
34
-#else
35
snprintf(dname, sizeof dname, "/dev/tap%d", i);
36
-#endif
37
}
38
TFR(fd = open(dname, O_RDWR));
39
if (fd >= 0) {
36
--
40
--
37
2.5.0
41
2.7.4
38
42
39
43
diff view generated by jsdifflib