1 | The following changes since commit a04d91c701251a9b32b7364ddb48029ba024cb75: | 1 | The following changes since commit 23895cbd82be95428e90168b12e925d0d3ca2f06: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/alistair/tags/pull-device-tree-20190327' into staging (2019-03-28 12:39:43 +0000) | 3 | Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20201123.0' into staging (2020-11-23 18:51:13 +0000) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | https://github.com/jasowang/qemu.git tags/net-pull-request | 7 | https://github.com/jasowang/qemu.git tags/net-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to ab79237a15e8f8c23310291b672d83374cf17935: | 9 | for you to fetch changes up to 9925990d01a92564af55f6f69d0f5f59b47609b1: |
10 | 10 | ||
11 | net: tap: use qemu_set_nonblock (2019-03-29 15:22:18 +0800) | 11 | net: Use correct default-path macro for downscript (2020-11-24 10:40:17 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | 14 | ||
15 | ---------------------------------------------------------------- | 15 | ---------------------------------------------------------------- |
16 | Li Qiang (1): | 16 | Keqian Zhu (1): |
17 | net: tap: use qemu_set_nonblock | 17 | net: Use correct default-path macro for downscript |
18 | 18 | ||
19 | Marc-André Lureau (1): | 19 | Paolo Bonzini (1): |
20 | net/socket: learn to talk with a unix dgram socket | 20 | net: do not exit on "netdev_add help" monitor command |
21 | 21 | ||
22 | Zhang Chen (1): | 22 | Prasad J Pandit (1): |
23 | MAINTAINERS: Update the latest email address | 23 | hw/net/e1000e: advance desc_offset in case of null descriptor |
24 | 24 | ||
25 | yuchenlin (1): | 25 | Yuri Benditovich (1): |
26 | e1000: Delay flush queue when receive RCTL | 26 | net: purge queued rx packets on queue deletion |
27 | 27 | ||
28 | MAINTAINERS | 2 +- | 28 | yuanjungong (1): |
29 | hw/net/e1000.c | 24 ++++++++++++++++++++++-- | 29 | tap: fix a memory leak |
30 | net/socket.c | 25 +++++++++++++++++++++---- | 30 | |
31 | net/tap.c | 10 +++++----- | 31 | hw/net/e1000e_core.c | 8 +++--- |
32 | 4 files changed, 49 insertions(+), 12 deletions(-) | 32 | include/net/net.h | 1 + |
33 | monitor/hmp-cmds.c | 6 ++++ | ||
34 | net/net.c | 80 +++++++++++++++++++++++++++------------------------- | ||
35 | net/tap.c | 5 +++- | ||
36 | 5 files changed, 57 insertions(+), 43 deletions(-) | ||
33 | 37 | ||
34 | 38 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Prasad J Pandit <pjp@fedoraproject.org> | ||
1 | 2 | ||
3 | While receiving packets via e1000e_write_packet_to_guest() routine, | ||
4 | 'desc_offset' is advanced only when RX descriptor is processed. And | ||
5 | RX descriptor is not processed if it has NULL buffer address. | ||
6 | This may lead to an infinite loop condition. Increament 'desc_offset' | ||
7 | to process next descriptor in the ring to avoid infinite loop. | ||
8 | |||
9 | Reported-by: Cheol-woo Myung <330cjfdn@gmail.com> | ||
10 | Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> | ||
11 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
12 | --- | ||
13 | hw/net/e1000e_core.c | 8 ++++---- | ||
14 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
15 | |||
16 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/hw/net/e1000e_core.c | ||
19 | +++ b/hw/net/e1000e_core.c | ||
20 | @@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, | ||
21 | (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); | ||
22 | } | ||
23 | } | ||
24 | - desc_offset += desc_size; | ||
25 | - if (desc_offset >= total_size) { | ||
26 | - is_last = true; | ||
27 | - } | ||
28 | } else { /* as per intel docs; skip descriptors with null buf addr */ | ||
29 | trace_e1000e_rx_null_descriptor(); | ||
30 | } | ||
31 | + desc_offset += desc_size; | ||
32 | + if (desc_offset >= total_size) { | ||
33 | + is_last = true; | ||
34 | + } | ||
35 | |||
36 | e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, | ||
37 | rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); | ||
38 | -- | ||
39 | 2.7.4 | ||
40 | |||
41 | diff view generated by jsdifflib |
1 | From: yuchenlin <yuchenlin@synology.com> | 1 | From: Paolo Bonzini <pbonzini@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Due to too early RCT0 interrput, win10x32 may hang on booting. | 3 | "netdev_add help" is causing QEMU to exit because the code that |
4 | This problem can be reproduced by doing power cycle on win10x32 guest. | 4 | invokes show_netdevs is shared between CLI and HMP processing. |
5 | In our environment, we have 10 win10x32 and stress power cycle. | 5 | Move the check to the callers so that exit(0) remains only |
6 | The problem will happen about 20 rounds. | 6 | in the CLI flow. |
7 | 7 | ||
8 | Below shows some log with comment: | 8 | "netdev_add help" is not fixed by this patch; that is left for |
9 | later work. | ||
9 | 10 | ||
10 | The normal case: | 11 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> |
11 | |||
12 | 22831@1551928392.984687:e1000x_rx_disabled Received packet dropped | ||
13 | because receive is disabled RCTL = 0 | ||
14 | 22831@1551928392.985655:e1000x_rx_disabled Received packet dropped | ||
15 | because receive is disabled RCTL = 0 | ||
16 | 22831@1551928392.985801:e1000x_rx_disabled Received packet dropped | ||
17 | because receive is disabled RCTL = 0 | ||
18 | e1000: set_ics 0, ICR 0, IMR 0 | ||
19 | e1000: set_ics 0, ICR 0, IMR 0 | ||
20 | e1000: set_ics 0, ICR 0, IMR 0 | ||
21 | e1000: RCTL: 0, mac_reg[RCTL] = 0x0 | ||
22 | 22831@1551928393.056710:e1000x_rx_disabled Received packet dropped | ||
23 | because receive is disabled RCTL = 0 | ||
24 | e1000: set_ics 0, ICR 0, IMR 0 | ||
25 | e1000: ICR read: 0 | ||
26 | e1000: set_ics 0, ICR 0, IMR 0 | ||
27 | e1000: set_ics 0, ICR 0, IMR 0 | ||
28 | e1000: RCTL: 0, mac_reg[RCTL] = 0x0 | ||
29 | 22831@1551928393.077548:e1000x_rx_disabled Received packet dropped | ||
30 | because receive is disabled RCTL = 0 | ||
31 | e1000: set_ics 0, ICR 0, IMR 0 | ||
32 | e1000: ICR read: 0 | ||
33 | e1000: set_ics 2, ICR 0, IMR 0 | ||
34 | e1000: set_ics 2, ICR 2, IMR 0 | ||
35 | e1000: RCTL: 0, mac_reg[RCTL] = 0x0 | ||
36 | 22831@1551928393.102974:e1000x_rx_disabled Received packet dropped | ||
37 | because receive is disabled RCTL = 0 | ||
38 | 22831@1551928393.103267:e1000x_rx_disabled Received packet dropped | ||
39 | because receive is disabled RCTL = 0 | ||
40 | e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle | ||
41 | RX now | ||
42 | e1000: set_ics 0, ICR 2, IMR 9d <- unmask interrupt | ||
43 | e1000: RCTL: 255, mac_reg[RCTL] = 0x48002 | ||
44 | e1000: set_ics 80, ICR 2, IMR 9d <- interrupt and work! | ||
45 | ... | ||
46 | |||
47 | The bad case: | ||
48 | |||
49 | 27744@1551930483.117766:e1000x_rx_disabled Received packet dropped | ||
50 | because receive is disabled RCTL = 0 | ||
51 | 27744@1551930483.118398:e1000x_rx_disabled Received packet dropped | ||
52 | because receive is disabled RCTL = 0 | ||
53 | e1000: set_ics 0, ICR 0, IMR 0 | ||
54 | e1000: set_ics 0, ICR 0, IMR 0 | ||
55 | e1000: set_ics 0, ICR 0, IMR 0 | ||
56 | e1000: RCTL: 0, mac_reg[RCTL] = 0x0 | ||
57 | 27744@1551930483.198063:e1000x_rx_disabled Received packet dropped | ||
58 | because receive is disabled RCTL = 0 | ||
59 | e1000: set_ics 0, ICR 0, IMR 0 | ||
60 | e1000: ICR read: 0 | ||
61 | e1000: set_ics 0, ICR 0, IMR 0 | ||
62 | e1000: set_ics 0, ICR 0, IMR 0 | ||
63 | e1000: RCTL: 0, mac_reg[RCTL] = 0x0 | ||
64 | 27744@1551930483.218675:e1000x_rx_disabled Received packet dropped | ||
65 | because receive is disabled RCTL = 0 | ||
66 | e1000: set_ics 0, ICR 0, IMR 0 | ||
67 | e1000: ICR read: 0 | ||
68 | e1000: set_ics 2, ICR 0, IMR 0 | ||
69 | e1000: set_ics 2, ICR 2, IMR 0 | ||
70 | e1000: RCTL: 0, mac_reg[RCTL] = 0x0 | ||
71 | 27744@1551930483.241768:e1000x_rx_disabled Received packet dropped | ||
72 | because receive is disabled RCTL = 0 | ||
73 | 27744@1551930483.241979:e1000x_rx_disabled Received packet dropped | ||
74 | because receive is disabled RCTL = 0 | ||
75 | e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle | ||
76 | RX now | ||
77 | e1000: set_ics 80, ICR 2, IMR 0 <- flush queue (caused by setting RCTL) | ||
78 | e1000: set_ics 0, ICR 82, IMR 9d <- unmask interrupt and because 0x82&0x9d | ||
79 | != 0 generate interrupt, hang on here... | ||
80 | |||
81 | To workaround this problem, simply delay flush queue. Also stop receiving | ||
82 | when timer is going to run. | ||
83 | |||
84 | Tested on CentOS, Win7SP1x64 and Win10x32. | ||
85 | |||
86 | Signed-off-by: yuchenlin <yuchenlin@synology.com> | ||
87 | Reviewed-by: Dmitry Fleytman <dmitry.fleytman@gmail.com> | ||
88 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 12 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
89 | --- | 13 | --- |
90 | hw/net/e1000.c | 24 ++++++++++++++++++++++-- | 14 | include/net/net.h | 1 + |
91 | 1 file changed, 22 insertions(+), 2 deletions(-) | 15 | monitor/hmp-cmds.c | 6 +++++ |
16 | net/net.c | 68 +++++++++++++++++++++++++++--------------------------- | ||
17 | 3 files changed, 41 insertions(+), 34 deletions(-) | ||
92 | 18 | ||
93 | diff --git a/hw/net/e1000.c b/hw/net/e1000.c | 19 | diff --git a/include/net/net.h b/include/net/net.h |
94 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
95 | --- a/hw/net/e1000.c | 21 | --- a/include/net/net.h |
96 | +++ b/hw/net/e1000.c | 22 | +++ b/include/net/net.h |
97 | @@ -XXX,XX +XXX,XX @@ typedef struct E1000State_st { | 23 | @@ -XXX,XX +XXX,XX @@ extern const char *host_net_devices[]; |
98 | bool mit_irq_level; /* Tracks interrupt pin level. */ | 24 | |
99 | uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */ | 25 | /* from net.c */ |
100 | 26 | int net_client_parse(QemuOptsList *opts_list, const char *str); | |
101 | + QEMUTimer *flush_queue_timer; | 27 | +void show_netdevs(void); |
28 | int net_init_clients(Error **errp); | ||
29 | void net_check_clients(void); | ||
30 | void net_cleanup(void); | ||
31 | diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/monitor/hmp-cmds.c | ||
34 | +++ b/monitor/hmp-cmds.c | ||
35 | @@ -XXX,XX +XXX,XX @@ | ||
36 | #include "qemu/option.h" | ||
37 | #include "qemu/timer.h" | ||
38 | #include "qemu/sockets.h" | ||
39 | +#include "qemu/help_option.h" | ||
40 | #include "monitor/monitor-internal.h" | ||
41 | #include "qapi/error.h" | ||
42 | #include "qapi/clone-visitor.h" | ||
43 | @@ -XXX,XX +XXX,XX @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict) | ||
44 | { | ||
45 | Error *err = NULL; | ||
46 | QemuOpts *opts; | ||
47 | + const char *type = qdict_get_try_str(qdict, "type"); | ||
48 | |||
49 | + if (type && is_help_option(type)) { | ||
50 | + show_netdevs(); | ||
51 | + return; | ||
52 | + } | ||
53 | opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err); | ||
54 | if (err) { | ||
55 | goto out; | ||
56 | diff --git a/net/net.c b/net/net.c | ||
57 | index XXXXXXX..XXXXXXX 100644 | ||
58 | --- a/net/net.c | ||
59 | +++ b/net/net.c | ||
60 | @@ -XXX,XX +XXX,XX @@ | ||
61 | #include "qemu/config-file.h" | ||
62 | #include "qemu/ctype.h" | ||
63 | #include "qemu/iov.h" | ||
64 | +#include "qemu/qemu-print.h" | ||
65 | #include "qemu/main-loop.h" | ||
66 | #include "qemu/option.h" | ||
67 | #include "qapi/error.h" | ||
68 | @@ -XXX,XX +XXX,XX @@ static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp) | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | -static void show_netdevs(void) | ||
73 | +void show_netdevs(void) | ||
74 | { | ||
75 | int idx; | ||
76 | const char *available_netdevs[] = { | ||
77 | @@ -XXX,XX +XXX,XX @@ static void show_netdevs(void) | ||
78 | #endif | ||
79 | }; | ||
80 | |||
81 | - printf("Available netdev backend types:\n"); | ||
82 | + qemu_printf("Available netdev backend types:\n"); | ||
83 | for (idx = 0; idx < ARRAY_SIZE(available_netdevs); idx++) { | ||
84 | - puts(available_netdevs[idx]); | ||
85 | + qemu_printf("%s\n", available_netdevs[idx]); | ||
86 | } | ||
87 | } | ||
88 | |||
89 | @@ -XXX,XX +XXX,XX @@ static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp) | ||
90 | int ret = -1; | ||
91 | Visitor *v = opts_visitor_new(opts); | ||
92 | |||
93 | - const char *type = qemu_opt_get(opts, "type"); | ||
94 | - | ||
95 | - if (is_netdev && type && is_help_option(type)) { | ||
96 | - show_netdevs(); | ||
97 | - exit(0); | ||
98 | - } else { | ||
99 | - /* Parse convenience option format ip6-net=fec0::0[/64] */ | ||
100 | - const char *ip6_net = qemu_opt_get(opts, "ipv6-net"); | ||
101 | + /* Parse convenience option format ip6-net=fec0::0[/64] */ | ||
102 | + const char *ip6_net = qemu_opt_get(opts, "ipv6-net"); | ||
103 | |||
104 | - if (ip6_net) { | ||
105 | - char *prefix_addr; | ||
106 | - unsigned long prefix_len = 64; /* Default 64bit prefix length. */ | ||
107 | + if (ip6_net) { | ||
108 | + char *prefix_addr; | ||
109 | + unsigned long prefix_len = 64; /* Default 64bit prefix length. */ | ||
110 | |||
111 | - substrings = g_strsplit(ip6_net, "/", 2); | ||
112 | - if (!substrings || !substrings[0]) { | ||
113 | - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net", | ||
114 | - "a valid IPv6 prefix"); | ||
115 | - goto out; | ||
116 | - } | ||
117 | + substrings = g_strsplit(ip6_net, "/", 2); | ||
118 | + if (!substrings || !substrings[0]) { | ||
119 | + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net", | ||
120 | + "a valid IPv6 prefix"); | ||
121 | + goto out; | ||
122 | + } | ||
123 | |||
124 | - prefix_addr = substrings[0]; | ||
125 | + prefix_addr = substrings[0]; | ||
126 | |||
127 | - /* Handle user-specified prefix length. */ | ||
128 | - if (substrings[1] && | ||
129 | - qemu_strtoul(substrings[1], NULL, 10, &prefix_len)) | ||
130 | - { | ||
131 | - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, | ||
132 | - "ipv6-prefixlen", "a number"); | ||
133 | - goto out; | ||
134 | - } | ||
135 | - | ||
136 | - qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort); | ||
137 | - qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len, | ||
138 | - &error_abort); | ||
139 | - qemu_opt_unset(opts, "ipv6-net"); | ||
140 | + /* Handle user-specified prefix length. */ | ||
141 | + if (substrings[1] && | ||
142 | + qemu_strtoul(substrings[1], NULL, 10, &prefix_len)) | ||
143 | + { | ||
144 | + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, | ||
145 | + "ipv6-prefixlen", "a number"); | ||
146 | + goto out; | ||
147 | } | ||
102 | + | 148 | + |
103 | /* Compatibility flags for migration to/from qemu 1.3.0 and older */ | 149 | + qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort); |
104 | #define E1000_FLAG_AUTONEG_BIT 0 | 150 | + qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len, |
105 | #define E1000_FLAG_MIT_BIT 1 | 151 | + &error_abort); |
106 | @@ -XXX,XX +XXX,XX @@ static void e1000_reset(void *opaque) | 152 | + qemu_opt_unset(opts, "ipv6-net"); |
107 | 153 | } | |
108 | timer_del(d->autoneg_timer); | 154 | |
109 | timer_del(d->mit_timer); | 155 | /* Create an ID for -net if the user did not specify one */ |
110 | + timer_del(d->flush_queue_timer); | 156 | @@ -XXX,XX +XXX,XX @@ static int net_init_client(void *dummy, QemuOpts *opts, Error **errp) |
111 | d->mit_timer_on = 0; | 157 | |
112 | d->mit_irq_level = 0; | 158 | static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp) |
113 | d->mit_ide = 0; | 159 | { |
114 | @@ -XXX,XX +XXX,XX @@ set_ctrl(E1000State *s, int index, uint32_t val) | 160 | + const char *type = qemu_opt_get(opts, "type"); |
161 | + | ||
162 | + if (type && is_help_option(type)) { | ||
163 | + show_netdevs(); | ||
164 | + exit(0); | ||
165 | + } | ||
166 | return net_client_init(opts, true, errp); | ||
115 | } | 167 | } |
116 | 168 | ||
117 | static void | ||
118 | +e1000_flush_queue_timer(void *opaque) | ||
119 | +{ | ||
120 | + E1000State *s = opaque; | ||
121 | + | ||
122 | + qemu_flush_queued_packets(qemu_get_queue(s->nic)); | ||
123 | +} | ||
124 | + | ||
125 | +static void | ||
126 | set_rx_control(E1000State *s, int index, uint32_t val) | ||
127 | { | ||
128 | s->mac_reg[RCTL] = val; | ||
129 | @@ -XXX,XX +XXX,XX @@ set_rx_control(E1000State *s, int index, uint32_t val) | ||
130 | s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; | ||
131 | DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], | ||
132 | s->mac_reg[RCTL]); | ||
133 | - qemu_flush_queued_packets(qemu_get_queue(s->nic)); | ||
134 | + timer_mod(s->flush_queue_timer, | ||
135 | + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000); | ||
136 | } | ||
137 | |||
138 | static void | ||
139 | @@ -XXX,XX +XXX,XX @@ e1000_can_receive(NetClientState *nc) | ||
140 | E1000State *s = qemu_get_nic_opaque(nc); | ||
141 | |||
142 | return e1000x_rx_ready(&s->parent_obj, s->mac_reg) && | ||
143 | - e1000_has_rxbufs(s, 1); | ||
144 | + e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer); | ||
145 | } | ||
146 | |||
147 | static uint64_t rx_desc_base(E1000State *s) | ||
148 | @@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) | ||
149 | return -1; | ||
150 | } | ||
151 | |||
152 | + if (timer_pending(s->flush_queue_timer)) { | ||
153 | + return 0; | ||
154 | + } | ||
155 | + | ||
156 | /* Pad to minimum Ethernet frame length */ | ||
157 | if (size < sizeof(min_buf)) { | ||
158 | iov_to_buf(iov, iovcnt, 0, min_buf, size); | ||
159 | @@ -XXX,XX +XXX,XX @@ pci_e1000_uninit(PCIDevice *dev) | ||
160 | timer_free(d->autoneg_timer); | ||
161 | timer_del(d->mit_timer); | ||
162 | timer_free(d->mit_timer); | ||
163 | + timer_del(d->flush_queue_timer); | ||
164 | + timer_free(d->flush_queue_timer); | ||
165 | qemu_del_nic(d->nic); | ||
166 | } | ||
167 | |||
168 | @@ -XXX,XX +XXX,XX @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) | ||
169 | |||
170 | d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d); | ||
171 | d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d); | ||
172 | + d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, | ||
173 | + e1000_flush_queue_timer, d); | ||
174 | } | ||
175 | |||
176 | static void qdev_e1000_reset(DeviceState *dev) | ||
177 | -- | 169 | -- |
178 | 2.5.0 | 170 | 2.7.4 |
179 | 171 | ||
180 | 172 | diff view generated by jsdifflib |
1 | From: Marc-André Lureau <marcandre.lureau@redhat.com> | 1 | From: Yuri Benditovich <yuri.benditovich@daynix.com> |
---|---|---|---|
2 | 2 | ||
3 | -net socket has a fd argument, and may be passed pre-opened sockets. | 3 | https://bugzilla.redhat.com/show_bug.cgi?id=1829272 |
4 | When deleting queue pair, purge pending RX packets if any. | ||
5 | Example of problematic flow: | ||
6 | 1. Bring up q35 VM with tap (vhost off) and virtio-net or e1000e | ||
7 | 2. Run ping flood to the VM NIC ( 1 ms interval) | ||
8 | 3. Hot unplug the NIC device (device_del) | ||
9 | During unplug process one or more packets come, the NIC | ||
10 | can't receive, tap disables read_poll | ||
11 | 4. Hot plug the device (device_add) with the same netdev | ||
12 | The tap stays with read_poll disabled and does not receive | ||
13 | any packets anymore (tap_send never triggered) | ||
4 | 14 | ||
5 | TCP sockets use framing. | 15 | Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com> |
6 | UDP sockets have datagram boundaries. | ||
7 | |||
8 | When given a unix dgram socket, it will be able to read from it, but | ||
9 | will attempt to send on the dgram_dst, which is unset. The other end | ||
10 | will not receive the data. | ||
11 | |||
12 | Let's teach -net socket to recognize a UNIX DGRAM socket, and use the | ||
13 | regular send() command (without dgram_dst). | ||
14 | |||
15 | This makes running slirp out-of-process possible that | ||
16 | way (python pseudo-code): | ||
17 | |||
18 | a, b = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM) | ||
19 | |||
20 | subprocess.Popen('qemu -net socket,fd=%d -net user' % a.fileno(), shell=True) | ||
21 | subprocess.Popen('qemu ... -net nic -net socket,fd=%d' % b.fileno(), shell=True) | ||
22 | |||
23 | Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com> | ||
24 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 16 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
25 | --- | 17 | --- |
26 | net/socket.c | 25 +++++++++++++++++++++---- | 18 | net/net.c | 12 ++++++++---- |
27 | 1 file changed, 21 insertions(+), 4 deletions(-) | 19 | 1 file changed, 8 insertions(+), 4 deletions(-) |
28 | 20 | ||
29 | diff --git a/net/socket.c b/net/socket.c | 21 | diff --git a/net/net.c b/net/net.c |
30 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
31 | --- a/net/socket.c | 23 | --- a/net/net.c |
32 | +++ b/net/socket.c | 24 | +++ b/net/net.c |
33 | @@ -XXX,XX +XXX,XX @@ static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, | 25 | @@ -XXX,XX +XXX,XX @@ void qemu_del_nic(NICState *nic) |
34 | ssize_t ret; | 26 | |
35 | 27 | qemu_macaddr_set_free(&nic->conf->macaddr); | |
36 | do { | 28 | |
37 | - ret = qemu_sendto(s->fd, buf, size, 0, | 29 | - /* If this is a peer NIC and peer has already been deleted, free it now. */ |
38 | - (struct sockaddr *)&s->dgram_dst, | 30 | - if (nic->peer_deleted) { |
39 | - sizeof(s->dgram_dst)); | 31 | - for (i = 0; i < queues; i++) { |
40 | + if (s->dgram_dst.sin_family != AF_UNIX) { | 32 | - qemu_free_net_client(qemu_get_subqueue(nic, i)->peer); |
41 | + ret = qemu_sendto(s->fd, buf, size, 0, | 33 | + for (i = 0; i < queues; i++) { |
42 | + (struct sockaddr *)&s->dgram_dst, | 34 | + NetClientState *nc = qemu_get_subqueue(nic, i); |
43 | + sizeof(s->dgram_dst)); | 35 | + /* If this is a peer NIC and peer has already been deleted, free it now. */ |
44 | + } else { | 36 | + if (nic->peer_deleted) { |
45 | + ret = send(s->fd, buf, size, 0); | 37 | + qemu_free_net_client(nc->peer); |
46 | + } | 38 | + } else if (nc->peer) { |
47 | } while (ret == -1 && errno == EINTR); | 39 | + /* if there are RX packets pending, complete them */ |
48 | 40 | + qemu_purge_queued_packets(nc->peer); | |
49 | if (ret == -1 && errno == EAGAIN) { | 41 | } |
50 | @@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, | ||
51 | int newfd; | ||
52 | NetClientState *nc; | ||
53 | NetSocketState *s; | ||
54 | + SocketAddress *sa; | ||
55 | + SocketAddressType sa_type; | ||
56 | + | ||
57 | + sa = socket_local_address(fd, errp); | ||
58 | + if (!sa) { | ||
59 | + return NULL; | ||
60 | + } | ||
61 | + sa_type = sa->type; | ||
62 | + qapi_free_SocketAddress(sa); | ||
63 | |||
64 | /* fd passed: multicast: "learn" dgram_dst address from bound address and save it | ||
65 | * Because this may be "shared" socket from a "master" process, datagrams would be recv() | ||
66 | @@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, | ||
67 | "socket: fd=%d (cloned mcast=%s:%d)", | ||
68 | fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); | ||
69 | } else { | ||
70 | + if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) { | ||
71 | + s->dgram_dst.sin_family = AF_UNIX; | ||
72 | + } | ||
73 | + | ||
74 | snprintf(nc->info_str, sizeof(nc->info_str), | ||
75 | - "socket: fd=%d", fd); | ||
76 | + "socket: fd=%d %s", fd, SocketAddressType_str(sa_type)); | ||
77 | } | 42 | } |
78 | 43 | ||
79 | return s; | ||
80 | -- | 44 | -- |
81 | 2.5.0 | 45 | 2.7.4 |
82 | 46 | ||
83 | 47 | diff view generated by jsdifflib |
1 | From: Li Qiang <liq3ea@gmail.com> | 1 | From: yuanjungong <ruc_gongyuanjun@163.com> |
---|---|---|---|
2 | 2 | ||
3 | The fcntl will change the flags directly, use qemu_set_nonblock() | 3 | Close fd before returning. |
4 | instead. | ||
5 | 4 | ||
6 | Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> | 5 | Buglink: https://bugs.launchpad.net/qemu/+bug/1904486 |
7 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 6 | |
8 | Signed-off-by: Li Qiang <liq3ea@gmail.com> | 7 | Signed-off-by: yuanjungong <ruc_gongyuanjun@163.com> |
8 | Reviewed-by: Peter Maydell <peter.maydell@linaro.org> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 9 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
10 | --- | 10 | --- |
11 | net/tap.c | 10 +++++----- | 11 | net/tap.c | 2 ++ |
12 | 1 file changed, 5 insertions(+), 5 deletions(-) | 12 | 1 file changed, 2 insertions(+) |
13 | 13 | ||
14 | diff --git a/net/tap.c b/net/tap.c | 14 | diff --git a/net/tap.c b/net/tap.c |
15 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/net/tap.c | 16 | --- a/net/tap.c |
17 | +++ b/net/tap.c | 17 | +++ b/net/tap.c |
18 | @@ -XXX,XX +XXX,XX @@ int net_init_bridge(const Netdev *netdev, const char *name, | ||
19 | return -1; | ||
20 | } | ||
21 | |||
22 | - fcntl(fd, F_SETFL, O_NONBLOCK); | ||
23 | + qemu_set_nonblock(fd); | ||
24 | vnet_hdr = tap_probe_vnet_hdr(fd); | ||
25 | s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); | ||
26 | |||
27 | @@ -XXX,XX +XXX,XX @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, | ||
28 | } | ||
29 | return; | ||
30 | } | ||
31 | - fcntl(vhostfd, F_SETFL, O_NONBLOCK); | ||
32 | + qemu_set_nonblock(vhostfd); | ||
33 | } | ||
34 | options.opaque = (void *)(uintptr_t)vhostfd; | ||
35 | |||
36 | @@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name, | 18 | @@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name, |
19 | if (ret < 0) { | ||
20 | error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", | ||
21 | name, fd); | ||
22 | + close(fd); | ||
37 | return -1; | 23 | return -1; |
38 | } | 24 | } |
39 | 25 | ||
40 | - fcntl(fd, F_SETFL, O_NONBLOCK); | ||
41 | + qemu_set_nonblock(fd); | ||
42 | |||
43 | vnet_hdr = tap_probe_vnet_hdr(fd); | ||
44 | |||
45 | @@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name, | 26 | @@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name, |
46 | goto free_fail; | 27 | vhostfdname, vnet_hdr, fd, &err); |
47 | } | 28 | if (err) { |
48 | 29 | error_propagate(errp, err); | |
49 | - fcntl(fd, F_SETFL, O_NONBLOCK); | 30 | + close(fd); |
50 | + qemu_set_nonblock(fd); | ||
51 | |||
52 | if (i == 0) { | ||
53 | vnet_hdr = tap_probe_vnet_hdr(fd); | ||
54 | @@ -XXX,XX +XXX,XX @@ free_fail: | ||
55 | return -1; | 31 | return -1; |
56 | } | 32 | } |
57 | 33 | } else if (tap->has_fds) { | |
58 | - fcntl(fd, F_SETFL, O_NONBLOCK); | ||
59 | + qemu_set_nonblock(fd); | ||
60 | vnet_hdr = tap_probe_vnet_hdr(fd); | ||
61 | |||
62 | net_init_tap_one(tap, peer, "bridge", name, ifname, | ||
63 | -- | 34 | -- |
64 | 2.5.0 | 35 | 2.7.4 |
65 | 36 | ||
66 | 37 | diff view generated by jsdifflib |
1 | From: Zhang Chen <chen.zhang@intel.com> | 1 | From: Keqian Zhu <zhukeqian1@huawei.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Zhang Chen <chen.zhang@intel.com> | 3 | Fixes: 63c4db4c2e6d (net: relocate paths to helpers and scripts) |
4 | Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> | ||
4 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 5 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
5 | --- | 6 | --- |
6 | MAINTAINERS | 2 +- | 7 | net/tap.c | 3 ++- |
7 | 1 file changed, 1 insertion(+), 1 deletion(-) | 8 | 1 file changed, 2 insertions(+), 1 deletion(-) |
8 | 9 | ||
9 | diff --git a/MAINTAINERS b/MAINTAINERS | 10 | diff --git a/net/tap.c b/net/tap.c |
10 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/MAINTAINERS | 12 | --- a/net/tap.c |
12 | +++ b/MAINTAINERS | 13 | +++ b/net/tap.c |
13 | @@ -XXX,XX +XXX,XX @@ F: include/migration/failover.h | 14 | @@ -XXX,XX +XXX,XX @@ free_fail: |
14 | F: docs/COLO-FT.txt | 15 | script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT); |
15 | 16 | } | |
16 | COLO Proxy | 17 | if (!downscript) { |
17 | -M: Zhang Chen <zhangckid@gmail.com> | 18 | - downscript = default_downscript = get_relocated_path(DEFAULT_NETWORK_SCRIPT); |
18 | +M: Zhang Chen <chen.zhang@intel.com> | 19 | + downscript = default_downscript = |
19 | M: Li Zhijian <lizhijian@cn.fujitsu.com> | 20 | + get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT); |
20 | S: Supported | 21 | } |
21 | F: docs/colo-proxy.txt | 22 | |
23 | if (tap->has_ifname) { | ||
22 | -- | 24 | -- |
23 | 2.5.0 | 25 | 2.7.4 |
24 | 26 | ||
25 | 27 | diff view generated by jsdifflib |