Implement the initialization logic for AF_PACKET based netdev
endpoints in filter-redirector:
1. filter_redirector_netdev_setup(): Creates and binds AF_PACKET
sockets to the specified TAP interface. For in_netdev, the socket
is used to receive packets; for out_netdev, it is used to send.
2. filter_redirector_netdev_read(): Async handler for reading packets
from the in_netdev AF_PACKET socket. Packets are forwarded through
the redirector chain.
3. Updated cleanup to properly close AF_PACKET sockets and free
associated buffers.
4. Modified allow_send_when_stopped logic to consider both chardev
and netdev output endpoints, and to only enable when the
redirector is active (status=on).
5. VM state change handler now manages the AF_PACKET read handler
activation based on VM running state and enable_when_stopped.
Signed-off-by: Cindy Lu <lulu@redhat.com>
---
net/filter-mirror.c | 241 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 224 insertions(+), 17 deletions(-)
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index 37035f3892..f8001612ec 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -26,6 +26,13 @@
#include "qemu/sockets.h"
#include "block/aio-wait.h"
#include "system/runstate.h"
+#include "net/tap.h"
+#include "net/tap_int.h"
+
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+#include <netinet/if_ether.h>
typedef struct MirrorState MirrorState;
DECLARE_INSTANCE_CHECKER(MirrorState, FILTER_MIRROR,
@@ -42,6 +49,10 @@ struct MirrorState {
char *outdev;
char *in_netdev;
char *out_netdev;
+ NetClientState *out_net;
+ int in_netfd;
+ uint8_t *in_netbuf;
+ int out_netfd;
CharFrontend chr_in;
CharFrontend chr_out;
SocketReadState rs;
@@ -172,6 +183,17 @@ static int redirector_chr_can_read(void *opaque)
return REDIRECTOR_MAX_LEN;
}
+static bool filter_redirector_input_active(NetFilterState *nf, bool enable)
+{
+ MirrorState *s = FILTER_REDIRECTOR(nf);
+
+ if (!enable) {
+ return false;
+ }
+
+ return runstate_is_running() || s->enable_when_stopped;
+}
+
static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
{
NetFilterState *nf = opaque;
@@ -208,6 +230,40 @@ static void redirector_chr_event(void *opaque, QEMUChrEvent event)
}
}
+static void filter_redirector_netdev_read(void *opaque)
+{
+ NetFilterState *nf = opaque;
+ MirrorState *s = FILTER_REDIRECTOR(nf);
+ struct sockaddr_ll sll;
+ socklen_t sll_len;
+ ssize_t len;
+
+ if (!s->in_netbuf || s->in_netfd < 0) {
+ return;
+ }
+
+ for (;;) {
+ sll_len = sizeof(sll);
+ len = recvfrom(s->in_netfd, s->in_netbuf, REDIRECTOR_MAX_LEN, 0,
+ (struct sockaddr *)&sll, &sll_len);
+ if (len <= 0) {
+ break;
+ }
+
+ if (sll.sll_pkttype != PACKET_OUTGOING) {
+ continue;
+ }
+
+ redirector_to_filter(nf, s->in_netbuf, len);
+ }
+
+ if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK &&
+ errno != EINTR) {
+ error_report("filter redirector read in_netdev failed(%s)",
+ strerror(errno));
+ }
+}
+
static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
NetClientState *sender,
unsigned flags,
@@ -268,7 +324,19 @@ static void filter_redirector_cleanup(NetFilterState *nf)
qemu_chr_fe_deinit(&s->chr_in, false);
qemu_chr_fe_deinit(&s->chr_out, false);
- qemu_del_vm_change_state_handler(s->vmsentry);
+ if (s->vmsentry) {
+ qemu_del_vm_change_state_handler(s->vmsentry);
+ s->vmsentry = NULL;
+ }
+ if (s->in_netfd >= 0) {
+ qemu_set_fd_handler(s->in_netfd, NULL, NULL, NULL);
+ close(s->in_netfd);
+ s->in_netfd = -1;
+ }
+ if (s->out_netfd >= 0) {
+ close(s->out_netfd);
+ s->out_netfd = -1;
+ }
if (nf->netdev) {
nf->netdev->allow_send_when_stopped = 0;
@@ -320,13 +388,13 @@ filter_redirector_refresh_allow_send_when_stopped(NetFilterState *nf)
/*
* Allow sending when stopped if enable_when_stopped is set and we have
- * an outdev. This must be independent of nf->on (status) so that packets
- * can still flow through the filter chain to other filters even when this
- * redirector is disabled. Otherwise, tap_send() will disable read_poll
- * when qemu_can_send_packet() returns false, preventing further packet
- * processing.
+ * a redirector output endpoint and the redirector is enabled.
+ * Keeping this active while redirector status=off can unexpectedly
+ * drain packets in migration stop windows and perturb vhost ring state.
*/
- nc->allow_send_when_stopped = (s->enable_when_stopped && s->outdev);
+ nc->allow_send_when_stopped = (nf->on &&
+ s->enable_when_stopped &&
+ (s->outdev || s->out_netdev));
}
static void filter_redirector_vm_state_change(void *opaque, bool running,
@@ -335,8 +403,16 @@ static void filter_redirector_vm_state_change(void *opaque, bool running,
NetFilterState *nf = opaque;
MirrorState *s = FILTER_REDIRECTOR(nf);
NetClientState *nc = nf->netdev;
+ bool active = filter_redirector_input_active(nf, nf->on);
+
+ if (s->in_netfd >= 0) {
+ qemu_set_fd_handler(s->in_netfd,
+ active ? filter_redirector_netdev_read : NULL,
+ NULL,
+ active ? nf : NULL);
+ }
- if (!running && s->enable_when_stopped && nc->info->read_poll) {
+ if (!running && nc && s->enable_when_stopped && nc->info->read_poll) {
nc->info->read_poll(nc, true);
}
}
@@ -362,21 +438,127 @@ static void filter_redirector_maybe_enable_read_poll(NetFilterState *nf)
}
}
+static bool filter_redirector_netdev_setup(MirrorState *s, Error **errp)
+{
+ struct sockaddr_ll sll = { 0 };
+ char ifname[IFNAMSIZ] = { 0 };
+ int ifindex;
+ int fd;
+ NetClientState *nc;
+
+ if (s->in_netdev) {
+ int tapfd;
+ nc = qemu_find_netdev(s->in_netdev);
+ if (!nc) {
+ error_setg(errp, "in_netdev '%s' not found", s->in_netdev);
+ return false;
+ }
+
+ if (nc->info->type != NET_CLIENT_DRIVER_TAP) {
+ error_setg(errp, "in_netdev '%s' must be a TAP netdev",
+ s->in_netdev);
+ return false;
+ }
+
+ tapfd = tap_get_fd(nc);
+ if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
+ error_setg(errp, "failed to resolve TAP ifname for in_netdev '%s'",
+ s->in_netdev);
+ return false;
+ }
+ } else if (s->out_netdev) {
+ nc = qemu_find_netdev(s->out_netdev);
+ if (!nc) {
+ error_setg(errp, "out_netdev '%s' not found", s->out_netdev);
+ return false;
+ }
+ /*
+ * out_netdev always uses AF_PACKET. For TAP netdev we resolve the
+ * interface name from tap fd; for non-TAP netdev we interpret
+ * out_netdev string as host interface name.
+ */
+ if (nc->info->type == NET_CLIENT_DRIVER_TAP) {
+ int tapfd = tap_get_fd(nc);
+
+ if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
+ error_setg(errp,
+ "failed to resolve TAP ifname for out_netdev '%s'",
+ s->out_netdev);
+ return false;
+ }
+ } else {
+ snprintf(ifname, sizeof(ifname), "%s", s->out_netdev);
+ }
+ }
+
+ ifindex = if_nametoindex(ifname);
+ if (!ifindex) {
+ error_setg_errno(errp, errno,
+ "failed to resolve ifindex for '%s'", ifname);
+ return false;
+ }
+
+ fd = qemu_socket(AF_PACKET, SOCK_RAW | SOCK_NONBLOCK, htons(ETH_P_ALL));
+ if (fd < 0) {
+ error_setg_errno(errp, errno, "failed to create AF_PACKET socket");
+ return false;
+ }
+
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = ifindex;
+ sll.sll_protocol = htons(ETH_P_ALL);
+ if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+ error_setg_errno(errp, errno,
+ "failed to bind AF_PACKET socket for ifname '%s'",
+ ifname);
+ close(fd);
+ return false;
+ }
+
+ if (s->in_netdev) {
+ s->in_netfd = fd;
+ g_free(s->in_netbuf);
+ s->in_netbuf = g_malloc(REDIRECTOR_MAX_LEN);
+ } else {
+ s->out_netfd = fd;
+ s->out_net = nc;
+ }
+ return true;
+}
+
static void filter_redirector_setup(NetFilterState *nf, Error **errp)
{
MirrorState *s = FILTER_REDIRECTOR(nf);
Chardev *chr;
- if (!s->indev && !s->outdev) {
- error_setg(errp, "filter redirector needs 'indev' or "
- "'outdev' at least one property set");
+ if (!s->indev && !s->outdev && !s->in_netdev && !s->out_netdev) {
+ error_setg(errp, "filter redirector needs at least one of "
+ "'indev', 'outdev', 'in_netdev', or 'out_netdev'");
+ return;
+ }
+
+ if (s->indev && s->in_netdev) {
+ error_setg(errp, "'indev' and 'in_netdev' cannot both be set "
+ "for filter redirector");
+ return;
+ }
+
+ if (s->outdev && s->out_netdev) {
+ error_setg(errp, "'outdev' and 'out_netdev' cannot both be set "
+ "for filter redirector");
+ return;
+ }
+
+ if (s->in_netdev && s->out_netdev) {
+ error_setg(errp, "'in_netdev' and 'out_netdev' cannot both be set "
+ "for filter redirector");
+ return;
+ }
+
+ if (s->indev && s->outdev && !strcmp(s->indev, s->outdev)) {
+ error_setg(errp, "'indev' and 'outdev' could not be same "
+ "for filter redirector");
return;
- } else if (s->indev && s->outdev) {
- if (!strcmp(s->indev, s->outdev)) {
- error_setg(errp, "'indev' and 'outdev' could not be same "
- "for filter redirector");
- return;
- }
}
net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
@@ -412,9 +594,23 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
}
}
+ if (s->out_netdev || s->in_netdev) {
+ if (!filter_redirector_netdev_setup(s, errp)) {
+ return;
+ }
+ }
+
s->vmsentry = qemu_add_vm_change_state_handler(
filter_redirector_vm_state_change, nf);
+ if (s->in_netfd >= 0) {
+ bool active = filter_redirector_input_active(nf, nf->on);
+
+ qemu_set_fd_handler(s->in_netfd,
+ active ? filter_redirector_netdev_read : NULL,
+ NULL,
+ active ? nf : NULL);
+ }
filter_redirector_maybe_enable_read_poll(nf);
filter_redirector_refresh_allow_send_when_stopped(nf);
@@ -423,6 +619,7 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
static void filter_redirector_status_changed(NetFilterState *nf, Error **errp)
{
MirrorState *s = FILTER_REDIRECTOR(nf);
+ bool active = filter_redirector_input_active(nf, nf->on);
if (s->indev) {
if (nf->on) {
@@ -435,6 +632,13 @@ static void filter_redirector_status_changed(NetFilterState *nf, Error **errp)
}
}
+ if (s->in_netfd >= 0) {
+ qemu_set_fd_handler(s->in_netfd,
+ active ? filter_redirector_netdev_read : NULL,
+ NULL,
+ active ? nf : NULL);
+ }
+
if (nf->on) {
filter_redirector_maybe_enable_read_poll(nf);
}
@@ -665,6 +869,8 @@ static void filter_redirector_init(Object *obj)
MirrorState *s = FILTER_REDIRECTOR(obj);
s->vnet_hdr = false;
+ s->in_netfd = -1;
+ s->out_netfd = -1;
}
static void filter_mirror_fini(Object *obj)
@@ -682,6 +888,7 @@ static void filter_redirector_fini(Object *obj)
g_free(s->outdev);
g_free(s->in_netdev);
g_free(s->out_netdev);
+ g_free(s->in_netbuf);
}
static const TypeInfo filter_redirector_info = {
--
2.52.0
On Fri, Feb 13, 2026 at 3:11 PM Cindy Lu <lulu@redhat.com> wrote:
>
> Implement the initialization logic for AF_PACKET based netdev
> endpoints in filter-redirector:
>
> 1. filter_redirector_netdev_setup(): Creates and binds AF_PACKET
> sockets to the specified TAP interface. For in_netdev, the socket
> is used to receive packets; for out_netdev, it is used to send.
>
> 2. filter_redirector_netdev_read(): Async handler for reading packets
> from the in_netdev AF_PACKET socket. Packets are forwarded through
> the redirector chain.
>
> 3. Updated cleanup to properly close AF_PACKET sockets and free
> associated buffers.
>
> 4. Modified allow_send_when_stopped logic to consider both chardev
> and netdev output endpoints, and to only enable when the
> redirector is active (status=on).
>
> 5. VM state change handler now manages the AF_PACKET read handler
> activation based on VM running state and enable_when_stopped.
>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
> net/filter-mirror.c | 241 ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 224 insertions(+), 17 deletions(-)
>
> diff --git a/net/filter-mirror.c b/net/filter-mirror.c
> index 37035f3892..f8001612ec 100644
> --- a/net/filter-mirror.c
> +++ b/net/filter-mirror.c
> @@ -26,6 +26,13 @@
> #include "qemu/sockets.h"
> #include "block/aio-wait.h"
> #include "system/runstate.h"
> +#include "net/tap.h"
> +#include "net/tap_int.h"
> +
> +#include <sys/socket.h>
> +#include <net/if.h>
> +#include <linux/if_packet.h>
> +#include <netinet/if_ether.h>
>
> typedef struct MirrorState MirrorState;
> DECLARE_INSTANCE_CHECKER(MirrorState, FILTER_MIRROR,
> @@ -42,6 +49,10 @@ struct MirrorState {
> char *outdev;
> char *in_netdev;
> char *out_netdev;
> + NetClientState *out_net;
> + int in_netfd;
> + uint8_t *in_netbuf;
> + int out_netfd;
> CharFrontend chr_in;
> CharFrontend chr_out;
> SocketReadState rs;
> @@ -172,6 +183,17 @@ static int redirector_chr_can_read(void *opaque)
> return REDIRECTOR_MAX_LEN;
> }
>
> +static bool filter_redirector_input_active(NetFilterState *nf, bool enable)
> +{
> + MirrorState *s = FILTER_REDIRECTOR(nf);
> +
> + if (!enable) {
> + return false;
> + }
> +
> + return runstate_is_running() || s->enable_when_stopped;
> +}
> +
> static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
> {
> NetFilterState *nf = opaque;
> @@ -208,6 +230,40 @@ static void redirector_chr_event(void *opaque, QEMUChrEvent event)
> }
> }
>
> +static void filter_redirector_netdev_read(void *opaque)
> +{
> + NetFilterState *nf = opaque;
> + MirrorState *s = FILTER_REDIRECTOR(nf);
> + struct sockaddr_ll sll;
> + socklen_t sll_len;
> + ssize_t len;
> +
> + if (!s->in_netbuf || s->in_netfd < 0) {
> + return;
> + }
> +
> + for (;;) {
> + sll_len = sizeof(sll);
> + len = recvfrom(s->in_netfd, s->in_netbuf, REDIRECTOR_MAX_LEN, 0,
> + (struct sockaddr *)&sll, &sll_len);
> + if (len <= 0) {
> + break;
> + }
> +
> + if (sll.sll_pkttype != PACKET_OUTGOING) {
> + continue;
> + }
> +
> + redirector_to_filter(nf, s->in_netbuf, len);
> + }
> +
> + if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK &&
> + errno != EINTR) {
> + error_report("filter redirector read in_netdev failed(%s)",
> + strerror(errno));
> + }
> +}
> +
> static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
> NetClientState *sender,
> unsigned flags,
> @@ -268,7 +324,19 @@ static void filter_redirector_cleanup(NetFilterState *nf)
>
> qemu_chr_fe_deinit(&s->chr_in, false);
> qemu_chr_fe_deinit(&s->chr_out, false);
> - qemu_del_vm_change_state_handler(s->vmsentry);
> + if (s->vmsentry) {
> + qemu_del_vm_change_state_handler(s->vmsentry);
> + s->vmsentry = NULL;
> + }
> + if (s->in_netfd >= 0) {
> + qemu_set_fd_handler(s->in_netfd, NULL, NULL, NULL);
> + close(s->in_netfd);
> + s->in_netfd = -1;
> + }
> + if (s->out_netfd >= 0) {
> + close(s->out_netfd);
> + s->out_netfd = -1;
> + }
>
> if (nf->netdev) {
> nf->netdev->allow_send_when_stopped = 0;
> @@ -320,13 +388,13 @@ filter_redirector_refresh_allow_send_when_stopped(NetFilterState *nf)
>
> /*
> * Allow sending when stopped if enable_when_stopped is set and we have
> - * an outdev. This must be independent of nf->on (status) so that packets
> - * can still flow through the filter chain to other filters even when this
> - * redirector is disabled. Otherwise, tap_send() will disable read_poll
> - * when qemu_can_send_packet() returns false, preventing further packet
> - * processing.
> + * a redirector output endpoint and the redirector is enabled.
> + * Keeping this active while redirector status=off can unexpectedly
> + * drain packets in migration stop windows and perturb vhost ring state.
> */
> - nc->allow_send_when_stopped = (s->enable_when_stopped && s->outdev);
> + nc->allow_send_when_stopped = (nf->on &&
> + s->enable_when_stopped &&
> + (s->outdev || s->out_netdev));
> }
>
> static void filter_redirector_vm_state_change(void *opaque, bool running,
> @@ -335,8 +403,16 @@ static void filter_redirector_vm_state_change(void *opaque, bool running,
> NetFilterState *nf = opaque;
> MirrorState *s = FILTER_REDIRECTOR(nf);
> NetClientState *nc = nf->netdev;
> + bool active = filter_redirector_input_active(nf, nf->on);
> +
> + if (s->in_netfd >= 0) {
> + qemu_set_fd_handler(s->in_netfd,
> + active ? filter_redirector_netdev_read : NULL,
> + NULL,
> + active ? nf : NULL);
> + }
>
> - if (!running && s->enable_when_stopped && nc->info->read_poll) {
> + if (!running && nc && s->enable_when_stopped && nc->info->read_poll) {
> nc->info->read_poll(nc, true);
> }
> }
> @@ -362,21 +438,127 @@ static void filter_redirector_maybe_enable_read_poll(NetFilterState *nf)
> }
> }
>
> +static bool filter_redirector_netdev_setup(MirrorState *s, Error **errp)
> +{
> + struct sockaddr_ll sll = { 0 };
> + char ifname[IFNAMSIZ] = { 0 };
> + int ifindex;
> + int fd;
> + NetClientState *nc;
> +
> + if (s->in_netdev) {
> + int tapfd;
> + nc = qemu_find_netdev(s->in_netdev);
> + if (!nc) {
> + error_setg(errp, "in_netdev '%s' not found", s->in_netdev);
> + return false;
> + }
> +
> + if (nc->info->type != NET_CLIENT_DRIVER_TAP) {
> + error_setg(errp, "in_netdev '%s' must be a TAP netdev",
> + s->in_netdev);
> + return false;
> + }
This seems inelegant; the netfilter should not be coupled with a
specific netdev type. Any reason for doing this?
> +
> + tapfd = tap_get_fd(nc);
> + if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
> + error_setg(errp, "failed to resolve TAP ifname for in_netdev '%s'",
> + s->in_netdev);
> + return false;
> + }
> + } else if (s->out_netdev) {
> + nc = qemu_find_netdev(s->out_netdev);
> + if (!nc) {
> + error_setg(errp, "out_netdev '%s' not found", s->out_netdev);
> + return false;
> + }
> + /*
> + * out_netdev always uses AF_PACKET.
I think I don't get this.
> For TAP netdev we resolve the
> + * interface name from tap fd; for non-TAP netdev we interpret
> + * out_netdev string as host interface name.
> + */
> + if (nc->info->type == NET_CLIENT_DRIVER_TAP) {
> + int tapfd = tap_get_fd(nc);
> +
> + if (tapfd < 0 || tap_fd_get_ifname(tapfd, ifname) != 0) {
> + error_setg(errp,
> + "failed to resolve TAP ifname for out_netdev '%s'",
> + s->out_netdev);
> + return false;
> + }
> + } else {
> + snprintf(ifname, sizeof(ifname), "%s", s->out_netdev);
> + }
> + }
> +
> + ifindex = if_nametoindex(ifname);
> + if (!ifindex) {
> + error_setg_errno(errp, errno,
> + "failed to resolve ifindex for '%s'", ifname);
> + return false;
> + }
> +
> + fd = qemu_socket(AF_PACKET, SOCK_RAW | SOCK_NONBLOCK, htons(ETH_P_ALL));
There must be a misunderstanding; can we simply reuse the -netdev
socket for packet socket?
Another concern, using packet socket requires CAP_NET_ADMIN etc.
> + if (fd < 0) {
> + error_setg_errno(errp, errno, "failed to create AF_PACKET socket");
> + return false;
> + }
> +
> + sll.sll_family = AF_PACKET;
> + sll.sll_ifindex = ifindex;
> + sll.sll_protocol = htons(ETH_P_ALL);
> + if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
> + error_setg_errno(errp, errno,
> + "failed to bind AF_PACKET socket for ifname '%s'",
> + ifname);
> + close(fd);
> + return false;
> + }
> +
> + if (s->in_netdev) {
> + s->in_netfd = fd;
> + g_free(s->in_netbuf);
> + s->in_netbuf = g_malloc(REDIRECTOR_MAX_LEN);
> + } else {
> + s->out_netfd = fd;
> + s->out_net = nc;
> + }
> + return true;
> +}
> +
Thanks
© 2016 - 2026 Red Hat, Inc.