From: Bobby Eshleman <bobby.eshleman@bytedance.com>
This commit adds the common datagram receive functionality for virtio
transports. It does not add the vhost/virtio users of that
functionality.
This functionality includes:
- changes to the virtio_transport_recv_pkt() path for finding the
bound socket receiver for incoming packets
- virtio_transport_recv_pkt() saves the source cid and port to the
control buffer for recvmsg() to initialize sockaddr_vm structure
when using datagram
Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
Signed-off-by: Amery Hung <amery.hung@bytedance.com>
---
net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
1 file changed, 66 insertions(+), 13 deletions(-)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 46cd1807f8e3..a571b575fde9 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
static u16 virtio_transport_get_type(struct sock *sk)
{
- if (sk->sk_type == SOCK_STREAM)
+ if (sk->sk_type == SOCK_DGRAM)
+ return VIRTIO_VSOCK_TYPE_DGRAM;
+ else if (sk->sk_type == SOCK_STREAM)
return VIRTIO_VSOCK_TYPE_STREAM;
else
return VIRTIO_VSOCK_TYPE_SEQPACKET;
@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
kfree_skb(skb);
}
+static void
+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
+{
+ if (err == -ENOMEM)
+ kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
+ else if (err == -ENOBUFS)
+ kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
+ else
+ kfree_skb(skb);
+}
+
+/* This function takes ownership of the skb.
+ *
+ * It either places the skb on the sk_receive_queue or frees it.
+ */
+static void
+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
+{
+ int err;
+
+ err = sock_queue_rcv_skb(sk, skb);
+ if (err) {
+ virtio_transport_dgram_kfree_skb(skb, err);
+ return;
+ }
+}
+
static int
virtio_transport_recv_connected(struct sock *sk,
struct sk_buff *skb)
@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
static bool virtio_transport_valid_type(u16 type)
{
return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
- (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
+ (type == VIRTIO_VSOCK_TYPE_DGRAM);
}
/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
struct sk_buff *skb)
{
struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+ struct vsock_skb_cb *vsock_cb;
struct sockaddr_vm src, dst;
struct vsock_sock *vsk;
struct sock *sk;
bool space_available;
+ u16 type;
vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
le32_to_cpu(hdr->src_port));
vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
le32_to_cpu(hdr->dst_port));
+ type = le16_to_cpu(hdr->type);
+
trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
dst.svm_cid, dst.svm_port,
le32_to_cpu(hdr->len),
- le16_to_cpu(hdr->type),
+ type,
le16_to_cpu(hdr->op),
le32_to_cpu(hdr->flags),
le32_to_cpu(hdr->buf_alloc),
le32_to_cpu(hdr->fwd_cnt));
- if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
+ if (!virtio_transport_valid_type(type)) {
(void)virtio_transport_reset_no_sock(t, skb);
goto free_pkt;
}
- /* The socket must be in connected or bound table
- * otherwise send reset back
+ /* For stream/seqpacket, the socket must be in connected or bound table
+ * otherwise send reset back.
+ *
+ * For datagrams, no reset is sent back.
*/
sk = vsock_find_connected_socket(&src, &dst);
if (!sk) {
- sk = vsock_find_bound_socket(&dst);
- if (!sk) {
- (void)virtio_transport_reset_no_sock(t, skb);
- goto free_pkt;
+ if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
+ sk = vsock_find_bound_dgram_socket(&dst);
+ if (!sk)
+ goto free_pkt;
+ } else {
+ sk = vsock_find_bound_socket(&dst);
+ if (!sk) {
+ (void)virtio_transport_reset_no_sock(t, skb);
+ goto free_pkt;
+ }
}
}
- if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
- (void)virtio_transport_reset_no_sock(t, skb);
+ if (virtio_transport_get_type(sk) != type) {
+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
+ (void)virtio_transport_reset_no_sock(t, skb);
sock_put(sk);
goto free_pkt;
}
@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
/* Check if sk has been closed before lock_sock */
if (sock_flag(sk, SOCK_DONE)) {
- (void)virtio_transport_reset_no_sock(t, skb);
+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
+ (void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk);
goto free_pkt;
}
+ if (sk->sk_type == SOCK_DGRAM) {
+ vsock_cb = vsock_skb_cb(skb);
+ vsock_cb->src_cid = src.svm_cid;
+ vsock_cb->src_port = src.svm_port;
+ virtio_transport_recv_dgram(sk, skb);
+ goto out;
+ }
+
space_available = virtio_transport_space_update(sk, skb);
/* Update CID in case it has changed after a transport reset event */
@@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
break;
}
+out:
release_sock(sk);
/* Release refcnt obtained when we fetched this socket out of the
--
2.20.1
On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote:
>From: Bobby Eshleman <bobby.eshleman@bytedance.com>
>
>This commit adds the common datagram receive functionality for virtio
>transports. It does not add the vhost/virtio users of that
>functionality.
>
>This functionality includes:
>- changes to the virtio_transport_recv_pkt() path for finding the
> bound socket receiver for incoming packets
>- virtio_transport_recv_pkt() saves the source cid and port to the
> control buffer for recvmsg() to initialize sockaddr_vm structure
> when using datagram
>
>Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>Signed-off-by: Amery Hung <amery.hung@bytedance.com>
>---
> net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
> 1 file changed, 66 insertions(+), 13 deletions(-)
>
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index 46cd1807f8e3..a571b575fde9 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>
> static u16 virtio_transport_get_type(struct sock *sk)
> {
>- if (sk->sk_type == SOCK_STREAM)
>+ if (sk->sk_type == SOCK_DGRAM)
>+ return VIRTIO_VSOCK_TYPE_DGRAM;
>+ else if (sk->sk_type == SOCK_STREAM)
> return VIRTIO_VSOCK_TYPE_STREAM;
> else
> return VIRTIO_VSOCK_TYPE_SEQPACKET;
>@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
> kfree_skb(skb);
> }
>
>+static void
>+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
>+{
>+ if (err == -ENOMEM)
>+ kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
>+ else if (err == -ENOBUFS)
>+ kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
>+ else
>+ kfree_skb(skb);
>+}
>+
>+/* This function takes ownership of the skb.
>+ *
>+ * It either places the skb on the sk_receive_queue or frees it.
>+ */
>+static void
>+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
>+{
>+ int err;
>+
>+ err = sock_queue_rcv_skb(sk, skb);
>+ if (err) {
>+ virtio_transport_dgram_kfree_skb(skb, err);
>+ return;
>+ }
>+}
>+
> static int
> virtio_transport_recv_connected(struct sock *sk,
> struct sk_buff *skb)
>@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> static bool virtio_transport_valid_type(u16 type)
> {
> return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>- (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>+ (type == VIRTIO_VSOCK_TYPE_DGRAM);
> }
>
> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> struct sk_buff *skb)
> {
> struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
>+ struct vsock_skb_cb *vsock_cb;
This can be defined in the block where it's used.
> struct sockaddr_vm src, dst;
> struct vsock_sock *vsk;
> struct sock *sk;
> bool space_available;
>+ u16 type;
>
> vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
> le32_to_cpu(hdr->src_port));
> vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
> le32_to_cpu(hdr->dst_port));
>
>+ type = le16_to_cpu(hdr->type);
>+
> trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
> dst.svm_cid, dst.svm_port,
> le32_to_cpu(hdr->len),
>- le16_to_cpu(hdr->type),
>+ type,
> le16_to_cpu(hdr->op),
> le32_to_cpu(hdr->flags),
> le32_to_cpu(hdr->buf_alloc),
> le32_to_cpu(hdr->fwd_cnt));
>
>- if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
>+ if (!virtio_transport_valid_type(type)) {
> (void)virtio_transport_reset_no_sock(t, skb);
> goto free_pkt;
> }
>
>- /* The socket must be in connected or bound table
>- * otherwise send reset back
>+ /* For stream/seqpacket, the socket must be in connected or bound table
>+ * otherwise send reset back.
>+ *
>+ * For datagrams, no reset is sent back.
> */
> sk = vsock_find_connected_socket(&src, &dst);
> if (!sk) {
>- sk = vsock_find_bound_socket(&dst);
>- if (!sk) {
>- (void)virtio_transport_reset_no_sock(t, skb);
>- goto free_pkt;
>+ if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
>+ sk = vsock_find_bound_dgram_socket(&dst);
>+ if (!sk)
>+ goto free_pkt;
>+ } else {
>+ sk = vsock_find_bound_socket(&dst);
>+ if (!sk) {
>+ (void)virtio_transport_reset_no_sock(t, skb);
>+ goto free_pkt;
>+ }
> }
> }
>
>- if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
>- (void)virtio_transport_reset_no_sock(t, skb);
>+ if (virtio_transport_get_type(sk) != type) {
>+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>+ (void)virtio_transport_reset_no_sock(t, skb);
> sock_put(sk);
> goto free_pkt;
> }
>@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>
> /* Check if sk has been closed before lock_sock */
> if (sock_flag(sk, SOCK_DONE)) {
>- (void)virtio_transport_reset_no_sock(t, skb);
>+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>+ (void)virtio_transport_reset_no_sock(t, skb);
> release_sock(sk);
> sock_put(sk);
> goto free_pkt;
> }
>
>+ if (sk->sk_type == SOCK_DGRAM) {
>+ vsock_cb = vsock_skb_cb(skb);
>+ vsock_cb->src_cid = src.svm_cid;
>+ vsock_cb->src_port = src.svm_port;
>+ virtio_transport_recv_dgram(sk, skb);
What about adding an API that transports can use to hide this?
I mean something that hide vsock_cb creation and queue packet in the
socket receive queue. I'd also not expose vsock_skb_cb in an header, but
I'd handle it internally in af_vsock.c. So I'd just expose API to
queue/dequeue them.
Also why VMCI is using sk_receive_skb(), while we are using
sock_queue_rcv_skb()?
Thanks,
Stefano
>+ goto out;
>+ }
>+
> space_available = virtio_transport_space_update(sk, skb);
>
> /* Update CID in case it has changed after a transport reset event */
>@@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> break;
> }
>
>+out:
> release_sock(sk);
>
> /* Release refcnt obtained when we fetched this socket out of the
>--
>2.20.1
>
On Tue, Jul 23, 2024 at 7:42 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote:
> >From: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >
> >This commit adds the common datagram receive functionality for virtio
> >transports. It does not add the vhost/virtio users of that
> >functionality.
> >
> >This functionality includes:
> >- changes to the virtio_transport_recv_pkt() path for finding the
> > bound socket receiver for incoming packets
> >- virtio_transport_recv_pkt() saves the source cid and port to the
> > control buffer for recvmsg() to initialize sockaddr_vm structure
> > when using datagram
> >
> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >Signed-off-by: Amery Hung <amery.hung@bytedance.com>
> >---
> > net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
> > 1 file changed, 66 insertions(+), 13 deletions(-)
> >
> >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >index 46cd1807f8e3..a571b575fde9 100644
> >--- a/net/vmw_vsock/virtio_transport_common.c
> >+++ b/net/vmw_vsock/virtio_transport_common.c
> >@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> >
> > static u16 virtio_transport_get_type(struct sock *sk)
> > {
> >- if (sk->sk_type == SOCK_STREAM)
> >+ if (sk->sk_type == SOCK_DGRAM)
> >+ return VIRTIO_VSOCK_TYPE_DGRAM;
> >+ else if (sk->sk_type == SOCK_STREAM)
> > return VIRTIO_VSOCK_TYPE_STREAM;
> > else
> > return VIRTIO_VSOCK_TYPE_SEQPACKET;
> >@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
> > kfree_skb(skb);
> > }
> >
> >+static void
> >+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
> >+{
> >+ if (err == -ENOMEM)
> >+ kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
> >+ else if (err == -ENOBUFS)
> >+ kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
> >+ else
> >+ kfree_skb(skb);
> >+}
> >+
> >+/* This function takes ownership of the skb.
> >+ *
> >+ * It either places the skb on the sk_receive_queue or frees it.
> >+ */
> >+static void
> >+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
> >+{
> >+ int err;
> >+
> >+ err = sock_queue_rcv_skb(sk, skb);
> >+ if (err) {
> >+ virtio_transport_dgram_kfree_skb(skb, err);
> >+ return;
> >+ }
> >+}
> >+
> > static int
> > virtio_transport_recv_connected(struct sock *sk,
> > struct sk_buff *skb)
> >@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> > static bool virtio_transport_valid_type(u16 type)
> > {
> > return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> >- (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> >+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> >+ (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > }
> >
> > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> >@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> > struct sk_buff *skb)
> > {
> > struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
> >+ struct vsock_skb_cb *vsock_cb;
>
> This can be defined in the block where it's used.
>
Got it.
> > struct sockaddr_vm src, dst;
> > struct vsock_sock *vsk;
> > struct sock *sk;
> > bool space_available;
> >+ u16 type;
> >
> > vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
> > le32_to_cpu(hdr->src_port));
> > vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
> > le32_to_cpu(hdr->dst_port));
> >
> >+ type = le16_to_cpu(hdr->type);
> >+
> > trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
> > dst.svm_cid, dst.svm_port,
> > le32_to_cpu(hdr->len),
> >- le16_to_cpu(hdr->type),
> >+ type,
> > le16_to_cpu(hdr->op),
> > le32_to_cpu(hdr->flags),
> > le32_to_cpu(hdr->buf_alloc),
> > le32_to_cpu(hdr->fwd_cnt));
> >
> >- if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
> >+ if (!virtio_transport_valid_type(type)) {
> > (void)virtio_transport_reset_no_sock(t, skb);
> > goto free_pkt;
> > }
> >
> >- /* The socket must be in connected or bound table
> >- * otherwise send reset back
> >+ /* For stream/seqpacket, the socket must be in connected or bound table
> >+ * otherwise send reset back.
> >+ *
> >+ * For datagrams, no reset is sent back.
> > */
> > sk = vsock_find_connected_socket(&src, &dst);
> > if (!sk) {
> >- sk = vsock_find_bound_socket(&dst);
> >- if (!sk) {
> >- (void)virtio_transport_reset_no_sock(t, skb);
> >- goto free_pkt;
> >+ if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
> >+ sk = vsock_find_bound_dgram_socket(&dst);
> >+ if (!sk)
> >+ goto free_pkt;
> >+ } else {
> >+ sk = vsock_find_bound_socket(&dst);
> >+ if (!sk) {
> >+ (void)virtio_transport_reset_no_sock(t, skb);
> >+ goto free_pkt;
> >+ }
> > }
> > }
> >
> >- if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
> >- (void)virtio_transport_reset_no_sock(t, skb);
> >+ if (virtio_transport_get_type(sk) != type) {
> >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
> >+ (void)virtio_transport_reset_no_sock(t, skb);
> > sock_put(sk);
> > goto free_pkt;
> > }
> >@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >
> > /* Check if sk has been closed before lock_sock */
> > if (sock_flag(sk, SOCK_DONE)) {
> >- (void)virtio_transport_reset_no_sock(t, skb);
> >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
> >+ (void)virtio_transport_reset_no_sock(t, skb);
> > release_sock(sk);
> > sock_put(sk);
> > goto free_pkt;
> > }
> >
> >+ if (sk->sk_type == SOCK_DGRAM) {
> >+ vsock_cb = vsock_skb_cb(skb);
> >+ vsock_cb->src_cid = src.svm_cid;
> >+ vsock_cb->src_port = src.svm_port;
> >+ virtio_transport_recv_dgram(sk, skb);
>
>
> What about adding an API that transports can use to hide this?
>
> I mean something that hide vsock_cb creation and queue packet in the
> socket receive queue. I'd also not expose vsock_skb_cb in an header, but
> I'd handle it internally in af_vsock.c. So I'd just expose API to
> queue/dequeue them.
>
Got it. I will move vsock_skb_cb to af_vsock.c and create an API:
vsock_dgram_skb_save_src_addr(struct sk_buff *skb, u32 cid, u32 port)
Different dgram implementations will call this API instead of the code
block above to save the source address information into the control
buffer.
A side note on why this is a vsock API instead of a member function in
transport: As we move to support multi-transport dgram, different
transport implementations can place skb into the sk->sk_receive_queue.
Therefore, we cannot call transport-specific function in
vsock_dgram_recvmsg() to initialize struct sockaddr_vm. Hence, the
receiving paths of different transports need to call this API to save
source address.
> Also why VMCI is using sk_receive_skb(), while we are using
> sock_queue_rcv_skb()?
>
I _think_ originally we referred to UDP and UDS when designing virtio
dgram, and ended up with placing skb into sk_receive_queue directly. I
will look into this to provide better justification.
Thank you,
Amery
> Thanks,
> Stefano
>
> >+ goto out;
> >+ }
> >+
> > space_available = virtio_transport_space_update(sk, skb);
> >
> > /* Update CID in case it has changed after a transport reset event */
> >@@ -1691,6 +1743,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> > break;
> > }
> >
> >+out:
> > release_sock(sk);
> >
> > /* Release refcnt obtained when we fetched this socket out of the
> >--
> >2.20.1
> >
>
On Mon, Jul 29, 2024 at 05:35:01PM GMT, Amery Hung wrote:
>On Tue, Jul 23, 2024 at 7:42 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> On Wed, Jul 10, 2024 at 09:25:50PM GMT, Amery Hung wrote:
>> >From: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> >
>> >This commit adds the common datagram receive functionality for virtio
>> >transports. It does not add the vhost/virtio users of that
>> >functionality.
>> >
>> >This functionality includes:
>> >- changes to the virtio_transport_recv_pkt() path for finding the
>> > bound socket receiver for incoming packets
>> >- virtio_transport_recv_pkt() saves the source cid and port to the
>> > control buffer for recvmsg() to initialize sockaddr_vm structure
>> > when using datagram
>> >
>> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> >Signed-off-by: Amery Hung <amery.hung@bytedance.com>
>> >---
>> > net/vmw_vsock/virtio_transport_common.c | 79 +++++++++++++++++++++----
>> > 1 file changed, 66 insertions(+), 13 deletions(-)
>> >
>> >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> >index 46cd1807f8e3..a571b575fde9 100644
>> >--- a/net/vmw_vsock/virtio_transport_common.c
>> >+++ b/net/vmw_vsock/virtio_transport_common.c
>> >@@ -235,7 +235,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>> >
>> > static u16 virtio_transport_get_type(struct sock *sk)
>> > {
>> >- if (sk->sk_type == SOCK_STREAM)
>> >+ if (sk->sk_type == SOCK_DGRAM)
>> >+ return VIRTIO_VSOCK_TYPE_DGRAM;
>> >+ else if (sk->sk_type == SOCK_STREAM)
>> > return VIRTIO_VSOCK_TYPE_STREAM;
>> > else
>> > return VIRTIO_VSOCK_TYPE_SEQPACKET;
>> >@@ -1422,6 +1424,33 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
>> > kfree_skb(skb);
>> > }
>> >
>> >+static void
>> >+virtio_transport_dgram_kfree_skb(struct sk_buff *skb, int err)
>> >+{
>> >+ if (err == -ENOMEM)
>> >+ kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_RCVBUFF);
>> >+ else if (err == -ENOBUFS)
>> >+ kfree_skb_reason(skb, SKB_DROP_REASON_PROTO_MEM);
>> >+ else
>> >+ kfree_skb(skb);
>> >+}
>> >+
>> >+/* This function takes ownership of the skb.
>> >+ *
>> >+ * It either places the skb on the sk_receive_queue or frees it.
>> >+ */
>> >+static void
>> >+virtio_transport_recv_dgram(struct sock *sk, struct sk_buff *skb)
>> >+{
>> >+ int err;
>> >+
>> >+ err = sock_queue_rcv_skb(sk, skb);
>> >+ if (err) {
>> >+ virtio_transport_dgram_kfree_skb(skb, err);
>> >+ return;
>> >+ }
>> >+}
>> >+
>> > static int
>> > virtio_transport_recv_connected(struct sock *sk,
>> > struct sk_buff *skb)
>> >@@ -1591,7 +1620,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
>> > static bool virtio_transport_valid_type(u16 type)
>> > {
>> > return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>> >- (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>> >+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>> >+ (type == VIRTIO_VSOCK_TYPE_DGRAM);
>> > }
>> >
>> > /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>> >@@ -1601,44 +1631,57 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>> > struct sk_buff *skb)
>> > {
>> > struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
>> >+ struct vsock_skb_cb *vsock_cb;
>>
>> This can be defined in the block where it's used.
>>
>
>Got it.
>
>> > struct sockaddr_vm src, dst;
>> > struct vsock_sock *vsk;
>> > struct sock *sk;
>> > bool space_available;
>> >+ u16 type;
>> >
>> > vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
>> > le32_to_cpu(hdr->src_port));
>> > vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
>> > le32_to_cpu(hdr->dst_port));
>> >
>> >+ type = le16_to_cpu(hdr->type);
>> >+
>> > trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
>> > dst.svm_cid, dst.svm_port,
>> > le32_to_cpu(hdr->len),
>> >- le16_to_cpu(hdr->type),
>> >+ type,
>> > le16_to_cpu(hdr->op),
>> > le32_to_cpu(hdr->flags),
>> > le32_to_cpu(hdr->buf_alloc),
>> > le32_to_cpu(hdr->fwd_cnt));
>> >
>> >- if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
>> >+ if (!virtio_transport_valid_type(type)) {
>> > (void)virtio_transport_reset_no_sock(t, skb);
>> > goto free_pkt;
>> > }
>> >
>> >- /* The socket must be in connected or bound table
>> >- * otherwise send reset back
>> >+ /* For stream/seqpacket, the socket must be in connected or bound table
>> >+ * otherwise send reset back.
>> >+ *
>> >+ * For datagrams, no reset is sent back.
>> > */
>> > sk = vsock_find_connected_socket(&src, &dst);
>> > if (!sk) {
>> >- sk = vsock_find_bound_socket(&dst);
>> >- if (!sk) {
>> >- (void)virtio_transport_reset_no_sock(t, skb);
>> >- goto free_pkt;
>> >+ if (type == VIRTIO_VSOCK_TYPE_DGRAM) {
>> >+ sk = vsock_find_bound_dgram_socket(&dst);
>> >+ if (!sk)
>> >+ goto free_pkt;
>> >+ } else {
>> >+ sk = vsock_find_bound_socket(&dst);
>> >+ if (!sk) {
>> >+ (void)virtio_transport_reset_no_sock(t, skb);
>> >+ goto free_pkt;
>> >+ }
>> > }
>> > }
>> >
>> >- if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
>> >- (void)virtio_transport_reset_no_sock(t, skb);
>> >+ if (virtio_transport_get_type(sk) != type) {
>> >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>> >+ (void)virtio_transport_reset_no_sock(t, skb);
>> > sock_put(sk);
>> > goto free_pkt;
>> > }
>> >@@ -1654,12 +1697,21 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>> >
>> > /* Check if sk has been closed before lock_sock */
>> > if (sock_flag(sk, SOCK_DONE)) {
>> >- (void)virtio_transport_reset_no_sock(t, skb);
>> >+ if (type != VIRTIO_VSOCK_TYPE_DGRAM)
>> >+ (void)virtio_transport_reset_no_sock(t, skb);
>> > release_sock(sk);
>> > sock_put(sk);
>> > goto free_pkt;
>> > }
>> >
>> >+ if (sk->sk_type == SOCK_DGRAM) {
>> >+ vsock_cb = vsock_skb_cb(skb);
>> >+ vsock_cb->src_cid = src.svm_cid;
>> >+ vsock_cb->src_port = src.svm_port;
>> >+ virtio_transport_recv_dgram(sk, skb);
>>
>>
>> What about adding an API that transports can use to hide this?
>>
>> I mean something that hide vsock_cb creation and queue packet in the
>> socket receive queue. I'd also not expose vsock_skb_cb in an header, but
>> I'd handle it internally in af_vsock.c. So I'd just expose API to
>> queue/dequeue them.
>>
>
>Got it. I will move vsock_skb_cb to af_vsock.c and create an API:
>
>vsock_dgram_skb_save_src_addr(struct sk_buff *skb, u32 cid, u32 port)
This is okay, but I would try to go further by directly adding an API to
queue dgrams in af_vsock.c (if it's feasible).
>
>Different dgram implementations will call this API instead of the code
>block above to save the source address information into the control
>buffer.
>
>A side note on why this is a vsock API instead of a member )unction in
>transport: As we move to support multi-transport dgram, different
>transport implementations can place skb into the sk->sk_receive_queue.
>Therefore, we cannot call transport-specific function in
>vsock_dgram_recvmsg() to initialize struct sockaddr_vm. Hence, the
>receiving paths of different transports need to call this API to save
>source address.
What I meant is, why virtio_transport_recv_dgram() can't be exposed by
af_vsock.c as vsock_recv_dgram() and handle all internally, like
populate vsock_cb, call sock_queue_rcv_skb(), etc.
>
>> Also why VMCI is using sk_receive_skb(), while we are using
>> sock_queue_rcv_skb()?
>>
>
>I _think_ originally we referred to UDP and UDS when designing virtio
>dgram, and ended up with placing skb into sk_receive_queue directly. I
>will look into this to provide better justification.
Great, thanks.
Maybe we can also ping VMCI maintainers to understand if they can switch
to sock_queue_rcv_skb(). But we should understand better the difference.
Thanks,
Stefano
© 2016 - 2025 Red Hat, Inc.