[RFC v4 5/5] chardev/socket: add AF_PACKET capture path

Cindy Lu posted 5 patches 4 days, 13 hours ago
[RFC v4 5/5] chardev/socket: add AF_PACKET capture path
Posted by Cindy Lu 4 days, 13 hours ago
Add the AF_PACKET capture read path for socket chardevs. When opened
with af-packet-mode=capture, the read side drains raw frames with
recvfrom(), keeps only PACKET_OUTGOING traffic, and feeds the result
through the normal chardev frontend interface.

Signed-off-by: Cindy Lu <lulu@redhat.com>
---
 chardev/char-socket.c | 133 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 131 insertions(+), 2 deletions(-)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 45d06fda8f..76a51a853d 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -107,9 +107,17 @@ static void tcp_chr_accept(QIONetListener *listener,
 
 static int tcp_chr_read_poll(void *opaque);
 static void tcp_chr_disconnect_locked(Chardev *chr);
+static void tcp_chr_deliver_af_packet(Chardev *chr);
 
 #define TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE 65536
 
+static bool
+tcp_chr_uses_af_packet_capture(SocketChardev *s)
+{
+    return s->is_af_packet && s->af_packet_mode_set &&
+           s->af_packet_mode == CHARDEV_SOCKET_AF_PACKET_MODE_CAPTURE;
+}
+
 static bool tcp_chr_uses_af_packet_inject(SocketChardev *s)
 {
     return s->is_af_packet &&
@@ -300,6 +308,9 @@ static int tcp_chr_read_poll(void *opaque)
         return 0;
     }
     s->max_size = qemu_chr_be_can_write(chr);
+    if (tcp_chr_uses_af_packet_capture(s) && s->af_packet_buf_len) {
+        tcp_chr_deliver_af_packet(chr);
+    }
     return s->max_size;
 }
 
@@ -500,6 +511,98 @@ static void tcp_chr_reset_af_packet_send(SocketChardev *s)
     s->af_packet_send_len_bytes = 0;
 }
 
+/* Push buffered AF_PACKET capture data into the chardev frontend. */
+static void
+tcp_chr_deliver_af_packet(Chardev *chr)
+{
+    SocketChardev *s = SOCKET_CHARDEV(chr);
+
+    while (s->max_size > 0 && s->af_packet_buf_offset < s->af_packet_buf_len) {
+        size_t remaining = s->af_packet_buf_len - s->af_packet_buf_offset;
+        size_t chunk = MIN((size_t)s->max_size, remaining);
+
+        qemu_chr_be_write(chr, s->af_packet_buf + s->af_packet_buf_offset,
+                          (int)chunk);
+        s->af_packet_buf_offset += chunk;
+        s->max_size = qemu_chr_be_can_write(chr);
+    }
+
+    if (s->af_packet_buf_offset == s->af_packet_buf_len) {
+        tcp_chr_reset_af_packet_buf(s);
+    }
+}
+
+/* Copy buffered AF_PACKET capture data into a synchronous read buffer. */
+static int tcp_chr_copy_af_packet_buf(SocketChardev *s, uint8_t *buf,
+                                      int len) {
+    size_t remaining = s->af_packet_buf_len - s->af_packet_buf_offset;
+    size_t copied = MIN((size_t)len, remaining);
+
+    memcpy(buf, s->af_packet_buf + s->af_packet_buf_offset, copied);
+    s->af_packet_buf_offset += copied;
+
+    if (s->af_packet_buf_offset == s->af_packet_buf_len) {
+        tcp_chr_reset_af_packet_buf(s);
+    }
+
+    return (int)copied;
+}
+
+static ssize_t
+tcp_chr_capture_af_packet(Chardev *chr)
+{
+#ifdef CONFIG_LINUX
+    SocketChardev *s = SOCKET_CHARDEV(chr);
+    struct sockaddr_ll sll;
+    socklen_t sll_len;
+    ssize_t size;
+    uint32_t len;
+
+    if (!tcp_chr_uses_af_packet_capture(s)) {
+        errno = EIO;
+        return -1;
+    }
+
+    if (s->af_packet_buf_size <
+        sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE) {
+        s->af_packet_buf =
+            g_realloc(s->af_packet_buf,
+                      sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE);
+        s->af_packet_buf_size =
+            sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE;
+    }
+
+    for (;;) {
+        sll_len = sizeof(sll);
+        do {
+            size = recvfrom(s->sioc->fd, s->af_packet_buf + sizeof(len),
+                            TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE, 0,
+                            (struct sockaddr *)&sll, &sll_len);
+        } while (size < 0 && errno == EINTR);
+
+        if (size <= 0) {
+            if (size < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
+                trace_chr_socket_recv_err(chr, chr->label, g_strerror(errno));
+            }
+            return size;
+        }
+
+        if (sll.sll_pkttype != PACKET_OUTGOING) {
+            continue;
+        }
+
+        len = htonl(size);
+        memcpy(s->af_packet_buf, &len, sizeof(len));
+        s->af_packet_buf_len = sizeof(len) + size;
+        s->af_packet_buf_offset = 0;
+        return (ssize_t)s->af_packet_buf_len;
+    }
+#else
+    errno = EPROTONOSUPPORT;
+    return -1;
+#endif
+}
+
 static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond)
 {
     SocketChardev *s = SOCKET_CHARDEV(chr);
@@ -682,6 +785,22 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
     if (len > s->max_size) {
         len = s->max_size;
     }
+    if (tcp_chr_uses_af_packet_capture(s)) {
+        tcp_chr_deliver_af_packet(chr);
+        if (s->max_size <= 0 || s->af_packet_buf_len) {
+            return TRUE;
+        }
+
+        size = tcp_chr_capture_af_packet(chr);
+        if (size == 0 || (size == -1 && errno != EAGAIN)) {
+            tcp_chr_disconnect(chr);
+        } else if (size > 0) {
+            tcp_chr_deliver_af_packet(chr);
+        }
+
+        return TRUE;
+    }
+
     size = tcp_chr_recv(chr, (void *)buf, len);
     if (size == 0 || (size == -1 && errno != EAGAIN)) {
         /* connection closed */
@@ -715,6 +834,10 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
     int saved_errno;
     Error *local_err = NULL;
 
+    if (tcp_chr_uses_af_packet_capture(s) && s->af_packet_buf_len) {
+        return tcp_chr_copy_af_packet_buf(s, (uint8_t *)buf, len);
+    }
+
     if (s->state != TCP_CHARDEV_STATE_CONNECTED) {
         return 0;
     }
@@ -723,7 +846,14 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
         error_report_err(local_err);
         return -1;
     }
-    size = tcp_chr_recv(chr, (void *) buf, len);
+    if (tcp_chr_uses_af_packet_capture(s)) {
+        size = tcp_chr_capture_af_packet(chr);
+        if (size > 0) {
+            size = tcp_chr_copy_af_packet_buf(s, (uint8_t *)buf, len);
+        }
+    } else {
+        size = tcp_chr_recv(chr, (void *)buf, len);
+    }
     saved_errno = errno;
     if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) {
         if (!qio_channel_set_blocking(s->ioc, false, &local_err)) {
@@ -1448,7 +1578,6 @@ static gboolean socket_reconnect_timeout(gpointer opaque)
     return false;
 }
 
-
 static int qmp_chardev_open_socket_server(Chardev *chr,
                                           bool is_telnet,
                                           bool is_waitconnect,
-- 
2.52.0
Re: [RFC v4 5/5] chardev/socket: add AF_PACKET capture path
Posted by Daniel P. Berrangé 3 days, 6 hours ago
On Tue, Apr 07, 2026 at 01:05:52PM +0800, Cindy Lu wrote:
> Add the AF_PACKET capture read path for socket chardevs. When opened
> with af-packet-mode=capture, the read side drains raw frames with
> recvfrom(), keeps only PACKET_OUTGOING traffic, and feeds the result
> through the normal chardev frontend interface.
> 
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  chardev/char-socket.c | 133 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 131 insertions(+), 2 deletions(-)
> 
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 45d06fda8f..76a51a853d 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -107,9 +107,17 @@ static void tcp_chr_accept(QIONetListener *listener,
>  
>  static int tcp_chr_read_poll(void *opaque);
>  static void tcp_chr_disconnect_locked(Chardev *chr);
> +static void tcp_chr_deliver_af_packet(Chardev *chr);
>  
>  #define TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE 65536
>  
> +static bool
> +tcp_chr_uses_af_packet_capture(SocketChardev *s)
> +{
> +    return s->is_af_packet && s->af_packet_mode_set &&
> +           s->af_packet_mode == CHARDEV_SOCKET_AF_PACKET_MODE_CAPTURE;
> +}
> +
>  static bool tcp_chr_uses_af_packet_inject(SocketChardev *s)
>  {
>      return s->is_af_packet &&
> @@ -300,6 +308,9 @@ static int tcp_chr_read_poll(void *opaque)
>          return 0;
>      }
>      s->max_size = qemu_chr_be_can_write(chr);
> +    if (tcp_chr_uses_af_packet_capture(s) && s->af_packet_buf_len) {
> +        tcp_chr_deliver_af_packet(chr);
> +    }
>      return s->max_size;
>  }
>  
> @@ -500,6 +511,98 @@ static void tcp_chr_reset_af_packet_send(SocketChardev *s)
>      s->af_packet_send_len_bytes = 0;
>  }
>  
> +/* Push buffered AF_PACKET capture data into the chardev frontend. */
> +static void
> +tcp_chr_deliver_af_packet(Chardev *chr)
> +{
> +    SocketChardev *s = SOCKET_CHARDEV(chr);
> +
> +    while (s->max_size > 0 && s->af_packet_buf_offset < s->af_packet_buf_len) {
> +        size_t remaining = s->af_packet_buf_len - s->af_packet_buf_offset;
> +        size_t chunk = MIN((size_t)s->max_size, remaining);
> +
> +        qemu_chr_be_write(chr, s->af_packet_buf + s->af_packet_buf_offset,
> +                          (int)chunk);
> +        s->af_packet_buf_offset += chunk;
> +        s->max_size = qemu_chr_be_can_write(chr);
> +    }
> +
> +    if (s->af_packet_buf_offset == s->af_packet_buf_len) {
> +        tcp_chr_reset_af_packet_buf(s);
> +    }
> +}
> +
> +/* Copy buffered AF_PACKET capture data into a synchronous read buffer. */
> +static int tcp_chr_copy_af_packet_buf(SocketChardev *s, uint8_t *buf,
> +                                      int len) {
> +    size_t remaining = s->af_packet_buf_len - s->af_packet_buf_offset;
> +    size_t copied = MIN((size_t)len, remaining);
> +
> +    memcpy(buf, s->af_packet_buf + s->af_packet_buf_offset, copied);
> +    s->af_packet_buf_offset += copied;
> +
> +    if (s->af_packet_buf_offset == s->af_packet_buf_len) {
> +        tcp_chr_reset_af_packet_buf(s);
> +    }
> +
> +    return (int)copied;
> +}
> +
> +static ssize_t
> +tcp_chr_capture_af_packet(Chardev *chr)
> +{
> +#ifdef CONFIG_LINUX
> +    SocketChardev *s = SOCKET_CHARDEV(chr);
> +    struct sockaddr_ll sll;
> +    socklen_t sll_len;
> +    ssize_t size;
> +    uint32_t len;
> +
> +    if (!tcp_chr_uses_af_packet_capture(s)) {
> +        errno = EIO;
> +        return -1;
> +    }
> +
> +    if (s->af_packet_buf_size <
> +        sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE) {
> +        s->af_packet_buf =
> +            g_realloc(s->af_packet_buf,
> +                      sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE);
> +        s->af_packet_buf_size =
> +            sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE;
> +    }
> +
> +    for (;;) {
> +        sll_len = sizeof(sll);
> +        do {
> +            size = recvfrom(s->sioc->fd, s->af_packet_buf + sizeof(len),
> +                            TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE, 0,
> +                            (struct sockaddr *)&sll, &sll_len);
> +        } while (size < 0 && errno == EINTR);
> +
> +        if (size <= 0) {
> +            if (size < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
> +                trace_chr_socket_recv_err(chr, chr->label, g_strerror(errno));
> +            }
> +            return size;
> +        }
> +
> +        if (sll.sll_pkttype != PACKET_OUTGOING) {
> +            continue;
> +        }
> +
> +        len = htonl(size);
> +        memcpy(s->af_packet_buf, &len, sizeof(len));
> +        s->af_packet_buf_len = sizeof(len) + size;
> +        s->af_packet_buf_offset = 0;
> +        return (ssize_t)s->af_packet_buf_len;
> +    }
> +#else
> +    errno = EPROTONOSUPPORT;
> +    return -1;
> +#endif
> +}
> +
>  static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond)
>  {
>      SocketChardev *s = SOCKET_CHARDEV(chr);
> @@ -682,6 +785,22 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
>      if (len > s->max_size) {
>          len = s->max_size;
>      }
> +    if (tcp_chr_uses_af_packet_capture(s)) {
> +        tcp_chr_deliver_af_packet(chr);
> +        if (s->max_size <= 0 || s->af_packet_buf_len) {
> +            return TRUE;
> +        }
> +
> +        size = tcp_chr_capture_af_packet(chr);
> +        if (size == 0 || (size == -1 && errno != EAGAIN)) {
> +            tcp_chr_disconnect(chr);
> +        } else if (size > 0) {
> +            tcp_chr_deliver_af_packet(chr);
> +        }
> +
> +        return TRUE;
> +    }
> +
>      size = tcp_chr_recv(chr, (void *)buf, len);
>      if (size == 0 || (size == -1 && errno != EAGAIN)) {
>          /* connection closed */
> @@ -715,6 +834,10 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>      int saved_errno;
>      Error *local_err = NULL;
>  
> +    if (tcp_chr_uses_af_packet_capture(s) && s->af_packet_buf_len) {
> +        return tcp_chr_copy_af_packet_buf(s, (uint8_t *)buf, len);
> +    }
> +
>      if (s->state != TCP_CHARDEV_STATE_CONNECTED) {
>          return 0;
>      }
> @@ -723,7 +846,14 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>          error_report_err(local_err);
>          return -1;
>      }
> -    size = tcp_chr_recv(chr, (void *) buf, len);
> +    if (tcp_chr_uses_af_packet_capture(s)) {
> +        size = tcp_chr_capture_af_packet(chr);
> +        if (size > 0) {
> +            size = tcp_chr_copy_af_packet_buf(s, (uint8_t *)buf, len);
> +        }
> +    } else {
> +        size = tcp_chr_recv(chr, (void *)buf, len);
> +    }


Similarly to the send side, I don't really think we should have this
packet re-assembly logic in the chardev code. We should just be
calling the normal qio_channel_read APIs and let the netfilter code
re-assemble packets it gets from the chardev. Mostly it seems we
would use TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE instead of CHR_READ_BUF_LEN
in the existing code paths.

>      saved_errno = errno;
>      if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) {
>          if (!qio_channel_set_blocking(s->ioc, false, &local_err)) {
> @@ -1448,7 +1578,6 @@ static gboolean socket_reconnect_timeout(gpointer opaque)
>      return false;
>  }
>  
> -
>  static int qmp_chardev_open_socket_server(Chardev *chr,
>                                            bool is_telnet,
>                                            bool is_waitconnect,
> -- 
> 2.52.0
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com       ~~        https://hachyderm.io/@berrange :|
|: https://libvirt.org          ~~          https://entangle-photo.org :|
|: https://pixelfed.art/berrange   ~~    https://fstop138.berrange.com :|