[RFC v3 5/7] net/filter-redirector: add AF_PACKET inject path

Cindy Lu posted 7 patches 3 days ago
[RFC v3 5/7] net/filter-redirector: add AF_PACKET inject path
Posted by Cindy Lu 3 days ago
Allow redirector packets to be injected back into a vhost-backed TAP
backend without going through the normal peer queue.

Export a redirector helper that sends an iov directly through the
redirector's AF_PACKET inject socket, and teach qemu_netfilter_pass_to_next()
to use it when the sender is backed by vhost. This lets the redirector
reinject traffic into the TAP device even when the regular userspace path
is bypassed by vhost.

The redirector chardev transport may carry an empty vnet header wrapper
before the Ethernet frame. Detect the longest supported all-zero wrapper
and strip it before sendmsg() so the AF_PACKET inject side always writes a
raw L2 frame back to TAP.

Propagate sendmsg() failures through ssize_t so the caller can report
partial/failed injection cleanly.

Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 include/net/filter.h |  6 +++
 net/filter-mirror.c  | 94 ++++++++++++++++++++++++++++++++++++++++++++
 net/filter.c         | 11 +++++-
 3 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/include/net/filter.h b/include/net/filter.h
index f0673c9163..68076a66ad 100644
--- a/include/net/filter.h
+++ b/include/net/filter.h
@@ -91,6 +91,12 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
                                     int iovcnt,
                                     void *opaque);
 
+bool qemu_netfilter_redirector_inject_netdev(NetFilterState *nf,
+                                             NetClientState *sender,
+                                             const struct iovec *iov,
+                                             int iovcnt,
+                                             ssize_t *retp);
+
 void colo_notify_filters_event(int event, Error **errp);
 
 #endif /* QEMU_NET_FILTER_H */
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index e114ddb7d1..165a7ee1ea 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -74,6 +74,100 @@ typedef struct FilterSendCo {
     int ret;
 } FilterSendCo;
 
+static size_t filter_redirector_wrapped_vnet_hdr_len(const struct iovec *iov,
+                                                     int iovcnt)
+{
+    static const size_t lens[] = {
+        sizeof(struct virtio_net_hdr_v1_hash_tunnel),
+        sizeof(struct virtio_net_hdr_v1_hash),
+        sizeof(struct virtio_net_hdr_mrg_rxbuf),
+        sizeof(struct virtio_net_hdr),
+    };
+    uint8_t hdr[sizeof(struct virtio_net_hdr_v1_hash_tunnel)] = { 0 };
+    uint8_t zero[sizeof(hdr)] = { 0 };
+    size_t total = iov_size(iov, iovcnt);
+    size_t len;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(lens); i++) {
+        len = lens[i];
+        if (len >= total) {
+            continue;
+        }
+        if (iov_to_buf(iov, iovcnt, 0, hdr, len) == len &&
+            !memcmp(hdr, zero, len)) {
+            return len;
+        }
+    }
+
+    return 0;
+}
+
+
+bool qemu_netfilter_redirector_inject_netdev(NetFilterState *nf,
+                                             NetClientState *sender,
+                                             const struct iovec *iov,
+                                             int iovcnt,
+                                             ssize_t *retp)
+{
+    MirrorState *s;
+    struct iovec local_iov = { 0 };
+    struct msghdr msg = {
+        .msg_iov = (struct iovec *)iov,
+        .msg_iovlen = iovcnt,
+    };
+    ssize_t ret;
+    size_t size;
+    size_t wrapped_vnet_hdr_len;
+
+    if (!object_dynamic_cast(OBJECT(nf), TYPE_FILTER_REDIRECTOR)) {
+        return false;
+    }
+
+    s = FILTER_REDIRECTOR(nf);
+    if (s->out_netfd < 0 || !sender || sender != nf->netdev) {
+        return false;
+    }
+
+    wrapped_vnet_hdr_len = (!s->vnet_hdr && qemu_get_vnet_hdr_len(sender)) ?
+                           filter_redirector_wrapped_vnet_hdr_len(iov, iovcnt) :
+                           0;
+    if (wrapped_vnet_hdr_len) {
+        /*
+         * Redirector chardev packets can carry an empty virtio-net header
+         * before the Ethernet frame. Strip the longest supported wrapped
+         * header before injecting the packet back to TAP.
+         */
+        local_iov.iov_len = iov_size(iov, iovcnt) - wrapped_vnet_hdr_len;
+        local_iov.iov_base = g_malloc(local_iov.iov_len);
+        iov_to_buf(iov, iovcnt, wrapped_vnet_hdr_len,
+                   local_iov.iov_base,
+                   local_iov.iov_len);
+        msg.msg_iov = &local_iov;
+        msg.msg_iovlen = 1;
+    }
+
+    size = iov_size(iov, iovcnt);
+    if (local_iov.iov_base) {
+        size = local_iov.iov_len;
+    }
+    do {
+        ret = sendmsg(s->out_netfd, &msg, 0);
+    } while (ret < 0 && errno == EINTR);
+
+    if (ret < 0) {
+        *retp = -errno;
+    } else if (ret != size) {
+        *retp = -EIO;
+    } else {
+        *retp = ret;
+    }
+
+    g_free(local_iov.iov_base);
+
+    return true;
+}
+
 static int _filter_send(MirrorState *s,
                        char *buf,
                        ssize_t size)
diff --git a/net/filter.c b/net/filter.c
index 76345c1a9d..847dd34698 100644
--- a/net/filter.c
+++ b/net/filter.c
@@ -81,7 +81,7 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
                                     int iovcnt,
                                     void *opaque)
 {
-    int ret = 0;
+    ssize_t ret = 0;
     int direction;
     NetFilterState *nf = opaque;
     NetFilterState *next = NULL;
@@ -124,6 +124,15 @@ ssize_t qemu_netfilter_pass_to_next(NetClientState *sender,
      * deleted while we go through filters.
      */
     if (sender && sender->peer) {
+        if (get_vhost_net(sender) &&
+            qemu_netfilter_redirector_inject_netdev(nf, sender,
+                                                    iov, iovcnt, &ret)) {
+            if (ret < 0) {
+                error_report("filter redirector AF_PACKET send failed(%s)",
+                             strerror(-ret));
+            }
+            goto out;
+        }
         qemu_net_queue_send_iov(sender->peer->incoming_queue,
                                 sender, flags, iov, iovcnt, NULL);
     }
-- 
2.52.0