Add the AF_PACKET inject write path for socket chardevs. When a socket
backend is opened with af-packet-mode=inject, tcp_chr_write() no longer
sends the redirector stream framing through QIOChannel. Instead it
parses the existing 4-byte length header, accumulates one complete
packet, and frame on the AF_PACKET fd.
Signed-off-by: Cindy Lu <lulu@redhat.com>
---
chardev/char-socket.c | 148 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 148 insertions(+)
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index c710fdb497..45d06fda8f 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -108,11 +108,159 @@ static void tcp_chr_accept(QIONetListener *listener,
static int tcp_chr_read_poll(void *opaque);
static void tcp_chr_disconnect_locked(Chardev *chr);
+#define TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE 65536
+
+static bool tcp_chr_uses_af_packet_inject(SocketChardev *s)
+{
+ return s->is_af_packet &&
+ s->af_packet_mode_set &&
+ s->af_packet_mode == CHARDEV_SOCKET_AF_PACKET_MODE_INJECT;
+}
+
+static ssize_t tcp_chr_send_af_packet(SocketChardev *s,
+ const uint8_t *buf,
+ size_t len)
+{
+#ifdef CONFIG_LINUX
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ ssize_t ret;
+
+ if (!s->sioc || s->sioc->localAddr.ss_family != AF_PACKET) {
+ errno = ENOTSOCK;
+ return -1;
+ }
+
+ do {
+ ret = sendmsg(s->sioc->fd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+#else
+ errno = EPROTONOSUPPORT;
+ return -1;
+#endif
+}
+
+static bool tcp_chr_af_packet_prepare_send(SocketChardev *s, uint32_t frame_len)
+{
+ if (frame_len > TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE) {
+ errno = EMSGSIZE;
+ return false;
+ }
+
+ if (frame_len == 0) {
+ s->af_packet_send_len = 0;
+ s->af_packet_send_offset = 0;
+ s->af_packet_send_len_bytes = 0;
+ return true;
+ }
+
+ if (s->af_packet_send_buf_size < frame_len) {
+ s->af_packet_send_buf = g_realloc(s->af_packet_send_buf, frame_len);
+ s->af_packet_send_buf_size = frame_len;
+ }
+
+ s->af_packet_send_len = frame_len;
+ s->af_packet_send_offset = 0;
+ s->af_packet_send_len_bytes = sizeof(s->af_packet_send_len_buf);
+ return true;
+}
+
+static int tcp_chr_inject_af_packet(Chardev *chr,
+ SocketChardev *s,
+ const uint8_t *buf,
+ int len)
+{
+ size_t offset = 0;
+ uint32_t frame_len_be;
+
+ while (offset < len) {
+ size_t copy;
+
+ if (s->af_packet_send_len_bytes < sizeof(s->af_packet_send_len_buf)) {
+ copy = MIN(sizeof(s->af_packet_send_len_buf) -
+ s->af_packet_send_len_bytes,
+ (size_t)len - offset);
+ memcpy(s->af_packet_send_len_buf + s->af_packet_send_len_bytes,
+ buf + offset, copy);
+ s->af_packet_send_len_bytes += copy;
+ offset += copy;
+
+ if (s->af_packet_send_len_bytes <
+ sizeof(s->af_packet_send_len_buf)) {
+ continue;
+ }
+
+ memcpy(&frame_len_be, s->af_packet_send_len_buf,
+ sizeof(frame_len_be));
+ if (!tcp_chr_af_packet_prepare_send(s, ntohl(frame_len_be))) {
+ return -1;
+ }
+ if (s->af_packet_send_len == 0) {
+ continue;
+ }
+ }
+
+ copy = MIN(s->af_packet_send_len - s->af_packet_send_offset,
+ (size_t)len - offset);
+ memcpy(s->af_packet_send_buf + s->af_packet_send_offset,
+ buf + offset, copy);
+ s->af_packet_send_offset += copy;
+ offset += copy;
+
+ if (s->af_packet_send_offset == s->af_packet_send_len) {
+ ssize_t ret;
+
+ ret = tcp_chr_send_af_packet(s, s->af_packet_send_buf,
+ s->af_packet_send_len);
+
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ return -1;
+ }
+ if (tcp_chr_read_poll(chr) <= 0) {
+ trace_chr_socket_poll_err(chr, chr->label);
+ tcp_chr_disconnect_locked(chr);
+ }
+ return -1;
+ }
+
+ if (ret != (ssize_t)s->af_packet_send_len) {
+ if (ret >= 0) {
+ errno = EIO;
+ }
+ if (tcp_chr_read_poll(chr) <= 0) {
+ trace_chr_socket_poll_err(chr, chr->label);
+ tcp_chr_disconnect_locked(chr);
+ }
+ return -1;
+ }
+
+ s->af_packet_send_len = 0;
+ s->af_packet_send_offset = 0;
+ s->af_packet_send_len_bytes = 0;
+ }
+ }
+
+ return len;
+}
+
/* Called with chr_write_lock held. */
static int tcp_chr_write(Chardev *chr, const uint8_t *buf, int len)
{
SocketChardev *s = SOCKET_CHARDEV(chr);
+ if (tcp_chr_uses_af_packet_inject(s)) {
+ return tcp_chr_inject_af_packet(chr, s, buf, len);
+ }
+
if (s->state == TCP_CHARDEV_STATE_CONNECTED) {
int ret = io_channel_send_full(s->ioc, buf, len,
s->write_msgfds,
--
2.52.0
On Tue, Apr 07, 2026 at 01:05:51PM +0800, Cindy Lu wrote:
> Add the AF_PACKET inject write path for socket chardevs. When a socket
> backend is opened with af-packet-mode=inject, tcp_chr_write() no longer
> sends the redirector stream framing through QIOChannel. Instead it
> parses the existing 4-byte length header, accumulates one complete
> packet, and frame on the AF_PACKET fd.
>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
> chardev/char-socket.c | 148 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 148 insertions(+)
>
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index c710fdb497..45d06fda8f 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -108,11 +108,159 @@ static void tcp_chr_accept(QIONetListener *listener,
> static int tcp_chr_read_poll(void *opaque);
> static void tcp_chr_disconnect_locked(Chardev *chr);
>
> +#define TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE 65536
> +
> +static bool tcp_chr_uses_af_packet_inject(SocketChardev *s)
> +{
> + return s->is_af_packet &&
> + s->af_packet_mode_set &&
> + s->af_packet_mode == CHARDEV_SOCKET_AF_PACKET_MODE_INJECT;
> +}
> +
> +static ssize_t tcp_chr_send_af_packet(SocketChardev *s,
> + const uint8_t *buf,
> + size_t len)
> +{
> +#ifdef CONFIG_LINUX
> + struct iovec iov = {
> + .iov_base = (void *)buf,
> + .iov_len = len,
> + };
> + struct msghdr msg = {
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + };
> + ssize_t ret;
> +
> + if (!s->sioc || s->sioc->localAddr.ss_family != AF_PACKET) {
> + errno = ENOTSOCK;
> + return -1;
> + }
> +
> + do {
> + ret = sendmsg(s->sioc->fd, &msg, 0);
> + } while (ret < 0 && errno == EINTR);
> +
> + return ret;
> +#else
> + errno = EPROTONOSUPPORT;
> + return -1;
> +#endif
> +}
> +
> +static bool tcp_chr_af_packet_prepare_send(SocketChardev *s, uint32_t frame_len)
> +{
> + if (frame_len > TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE) {
> + errno = EMSGSIZE;
> + return false;
> + }
> +
> + if (frame_len == 0) {
> + s->af_packet_send_len = 0;
> + s->af_packet_send_offset = 0;
> + s->af_packet_send_len_bytes = 0;
> + return true;
> + }
> +
> + if (s->af_packet_send_buf_size < frame_len) {
> + s->af_packet_send_buf = g_realloc(s->af_packet_send_buf, frame_len);
> + s->af_packet_send_buf_size = frame_len;
> + }
> +
> + s->af_packet_send_len = frame_len;
> + s->af_packet_send_offset = 0;
> + s->af_packet_send_len_bytes = sizeof(s->af_packet_send_len_buf);
> + return true;
> +}
> +
> +static int tcp_chr_inject_af_packet(Chardev *chr,
> + SocketChardev *s,
> + const uint8_t *buf,
> + int len)
> +{
> + size_t offset = 0;
> + uint32_t frame_len_be;
> +
> + while (offset < len) {
> + size_t copy;
> +
> + if (s->af_packet_send_len_bytes < sizeof(s->af_packet_send_len_buf)) {
> + copy = MIN(sizeof(s->af_packet_send_len_buf) -
> + s->af_packet_send_len_bytes,
> + (size_t)len - offset);
> + memcpy(s->af_packet_send_len_buf + s->af_packet_send_len_bytes,
> + buf + offset, copy);
> + s->af_packet_send_len_bytes += copy;
> + offset += copy;
> +
> + if (s->af_packet_send_len_bytes <
> + sizeof(s->af_packet_send_len_buf)) {
> + continue;
> + }
> +
> + memcpy(&frame_len_be, s->af_packet_send_len_buf,
> + sizeof(frame_len_be));
> + if (!tcp_chr_af_packet_prepare_send(s, ntohl(frame_len_be))) {
> + return -1;
> + }
> + if (s->af_packet_send_len == 0) {
> + continue;
> + }
> + }
> +
> + copy = MIN(s->af_packet_send_len - s->af_packet_send_offset,
> + (size_t)len - offset);
> + memcpy(s->af_packet_send_buf + s->af_packet_send_offset,
> + buf + offset, copy);
> + s->af_packet_send_offset += copy;
> + offset += copy;
> +
> + if (s->af_packet_send_offset == s->af_packet_send_len) {
> + ssize_t ret;
> +
> + ret = tcp_chr_send_af_packet(s, s->af_packet_send_buf,
> + s->af_packet_send_len);
> +
> + if (ret < 0) {
> + if (errno == EAGAIN || errno == EWOULDBLOCK) {
> + return -1;
> + }
> + if (tcp_chr_read_poll(chr) <= 0) {
> + trace_chr_socket_poll_err(chr, chr->label);
> + tcp_chr_disconnect_locked(chr);
> + }
> + return -1;
> + }
> +
> + if (ret != (ssize_t)s->af_packet_send_len) {
> + if (ret >= 0) {
> + errno = EIO;
> + }
> + if (tcp_chr_read_poll(chr) <= 0) {
> + trace_chr_socket_poll_err(chr, chr->label);
> + tcp_chr_disconnect_locked(chr);
> + }
> + return -1;
> + }
> +
> + s->af_packet_send_len = 0;
> + s->af_packet_send_offset = 0;
> + s->af_packet_send_len_bytes = 0;
> + }
> + }
> +
> + return len;
> +}
> +
> /* Called with chr_write_lock held. */
> static int tcp_chr_write(Chardev *chr, const uint8_t *buf, int len)
> {
> SocketChardev *s = SOCKET_CHARDEV(chr);
>
> + if (tcp_chr_uses_af_packet_inject(s)) {
> + return tcp_chr_inject_af_packet(chr, s, buf, len);
> + }
> +
This code is pretty unpleasant, completely bypassing all of the
normal I/O path in the chardev, and completely ignoring the
QIOChannel too, just poking the socket directly. Essentially
this shares nothing in common with the socket chardev functionality.
If we do want to have AF_PACKET support in the socket chardev then
IMHO all this buffer parsing code needs to be in the netfilter
layer instead. The chardev should just accept a single packet
buffer at a time, such that it can directly pass it on to the
normal qio_channel_write API which will call sendmsg.
> if (s->state == TCP_CHARDEV_STATE_CONNECTED) {
> int ret = io_channel_send_full(s->ioc, buf, len,
> s->write_msgfds,
> --
> 2.52.0
>
>
With regards,
Daniel
--
|: https://berrange.com ~~ https://hachyderm.io/@berrange :|
|: https://libvirt.org ~~ https://entangle-photo.org :|
|: https://pixelfed.art/berrange ~~ https://fstop138.berrange.com :|
© 2016 - 2026 Red Hat, Inc.