This patch supports dgram in virtio and on the vhost side.
Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
---
drivers/vhost/vsock.c | 2 +-
include/net/af_vsock.h | 2 +
include/uapi/linux/virtio_vsock.h | 1 +
net/vmw_vsock/af_vsock.c | 26 +++-
net/vmw_vsock/virtio_transport.c | 2 +-
net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
6 files changed, 186 insertions(+), 20 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index a5d1bdb786fe..3dc72a5647ca 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
int ret;
ret = vsock_core_register(&vhost_transport.transport,
- VSOCK_TRANSPORT_F_H2G);
+ VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
if (ret < 0)
return ret;
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 1c53c4c4d88f..37e55c81e4df 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -78,6 +78,8 @@ struct vsock_sock {
s64 vsock_stream_has_data(struct vsock_sock *vsk);
s64 vsock_stream_has_space(struct vsock_sock *vsk);
struct sock *vsock_create_connected(struct sock *parent);
+int vsock_bind_stream(struct vsock_sock *vsk,
+ struct sockaddr_vm *addr);
/**** TRANSPORT ****/
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 857df3a3a70d..0975b9c88292 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
enum virtio_vsock_type {
VIRTIO_VSOCK_TYPE_STREAM = 1,
VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
+ VIRTIO_VSOCK_TYPE_DGRAM = 3,
};
enum virtio_vsock_op {
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 1893f8aafa48..87e4ae1866d3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
return 0;
}
+int vsock_bind_stream(struct vsock_sock *vsk,
+ struct sockaddr_vm *addr)
+{
+ int retval;
+
+ spin_lock_bh(&vsock_table_lock);
+ retval = __vsock_bind_connectible(vsk, addr);
+ spin_unlock_bh(&vsock_table_lock);
+
+ return retval;
+}
+EXPORT_SYMBOL(vsock_bind_stream);
+
static int __vsock_bind_dgram(struct vsock_sock *vsk,
struct sockaddr_vm *addr)
{
@@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
}
if (features & VSOCK_TRANSPORT_F_DGRAM) {
- if (t_dgram) {
- err = -EBUSY;
- goto err_busy;
+ /* TODO: always chose the G2H variant over others, support nesting later */
+ if (features & VSOCK_TRANSPORT_F_G2H) {
+ if (t_dgram)
+ pr_warn("virtio_vsock: t_dgram already set\n");
+ t_dgram = t;
+ }
+
+ if (!t_dgram) {
+ t_dgram = t;
}
- t_dgram = t;
}
if (features & VSOCK_TRANSPORT_F_LOCAL) {
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 073314312683..d4526ca462d2 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
return -ENOMEM;
ret = vsock_core_register(&virtio_transport.transport,
- VSOCK_TRANSPORT_F_G2H);
+ VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
if (ret)
goto out_wq;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index bdf16fff054f..aedb48728677 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
static u16 virtio_transport_get_type(struct sock *sk)
{
- if (sk->sk_type == SOCK_STREAM)
+ if (sk->sk_type == SOCK_DGRAM)
+ return VIRTIO_VSOCK_TYPE_DGRAM;
+ else if (sk->sk_type == SOCK_STREAM)
return VIRTIO_VSOCK_TYPE_STREAM;
else
return VIRTIO_VSOCK_TYPE_SEQPACKET;
@@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
vvs = vsk->trans;
/* we can send less than pkt_len bytes */
- if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
- pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
+ if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+ if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
+ pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
+ else
+ return 0;
+ }
- /* virtio_transport_get_credit might return less than pkt_len credit */
- pkt_len = virtio_transport_get_credit(vvs, pkt_len);
+ if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
+ /* virtio_transport_get_credit might return less than pkt_len credit */
+ pkt_len = virtio_transport_get_credit(vvs, pkt_len);
- /* Do not send zero length OP_RW pkt */
- if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
- return pkt_len;
+ /* Do not send zero length OP_RW pkt */
+ if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
+ return pkt_len;
+ }
skb = virtio_transport_alloc_skb(info, pkt_len,
src_cid, src_port,
dst_cid, dst_port,
&err);
if (!skb) {
- virtio_transport_put_credit(vvs, pkt_len);
+ if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
+ virtio_transport_put_credit(vvs, pkt_len);
return err;
}
@@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
+static ssize_t
+virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
+ struct msghdr *msg, size_t len)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct sk_buff *skb;
+ size_t total = 0;
+ u32 free_space;
+ int err = -EFAULT;
+
+ spin_lock_bh(&vvs->rx_lock);
+ if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
+ skb = __skb_dequeue(&vvs->rx_queue);
+
+ total = len;
+ if (total > skb->len - vsock_metadata(skb)->off)
+ total = skb->len - vsock_metadata(skb)->off;
+ else if (total < skb->len - vsock_metadata(skb)->off)
+ msg->msg_flags |= MSG_TRUNC;
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ spin_unlock_bh(&vvs->rx_lock);
+
+ err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
+ if (err)
+ return err;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ virtio_transport_dec_rx_pkt(vvs, skb);
+ consume_skb(skb);
+ }
+
+ free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ if (total > 0 && msg->msg_name) {
+ /* Provide the address of the sender. */
+ DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
+
+ vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
+ le32_to_cpu(vsock_hdr(skb)->src_port));
+ msg->msg_namelen = sizeof(*vm_addr);
+ }
+ return total;
+}
+
+static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
+{
+ return virtio_transport_stream_has_data(vsk);
+}
+
int
virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
struct msghdr *msg,
@@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len, int flags)
{
- return -EOPNOTSUPP;
+ struct sock *sk;
+ size_t err = 0;
+ long timeout;
+
+ DEFINE_WAIT(wait);
+
+ sk = &vsk->sk;
+ err = 0;
+
+ if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
+ return -EOPNOTSUPP;
+
+ lock_sock(sk);
+
+ if (!len)
+ goto out;
+
+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ while (1) {
+ s64 ready;
+
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ ready = virtio_transport_dgram_has_data(vsk);
+
+ if (ready == 0) {
+ if (timeout == 0) {
+ err = -EAGAIN;
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+
+ release_sock(sk);
+ timeout = schedule_timeout(timeout);
+ lock_sock(sk);
+
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeout);
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ } else if (timeout == 0) {
+ err = -EAGAIN;
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+ } else {
+ finish_wait(sk_sleep(sk), &wait);
+
+ if (ready < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
+ break;
+ }
+ }
+out:
+ release_sock(sk);
+ return err;
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
@@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
int virtio_transport_dgram_bind(struct vsock_sock *vsk,
struct sockaddr_vm *addr)
{
- return -EOPNOTSUPP;
+ return vsock_bind_stream(vsk, addr);
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
bool virtio_transport_dgram_allow(u32 cid, u32 port)
{
- return false;
+ return true;
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
@@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t dgram_len)
{
- return -EOPNOTSUPP;
+ struct virtio_vsock_pkt_info info = {
+ .op = VIRTIO_VSOCK_OP_RW,
+ .msg = msg,
+ .pkt_len = dgram_len,
+ .vsk = vsk,
+ .remote_cid = remote_addr->svm_cid,
+ .remote_port = remote_addr->svm_port,
+ };
+
+ return virtio_transport_send_pkt_info(vsk, &info);
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
@@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
int err = 0;
+ if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
+ virtio_transport_recv_enqueue(vsk, skb);
+ sk->sk_data_ready(sk);
+ return err;
+ }
+
switch (le16_to_cpu(hdr->op)) {
case VIRTIO_VSOCK_OP_RW:
virtio_transport_recv_enqueue(vsk, skb);
@@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
static bool virtio_transport_valid_type(u16 type)
{
return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
- (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
+ (type == VIRTIO_VSOCK_TYPE_DGRAM);
}
/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
@@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
goto free_pkt;
}
+ if (sk->sk_type == SOCK_DGRAM) {
+ virtio_transport_recv_connected(sk, skb);
+ goto out;
+ }
+
space_available = virtio_transport_space_update(sk, skb);
/* Update CID in case it has changed after a transport reset event */
@@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
break;
}
+out:
release_sock(sk);
/* Release refcnt obtained when we fetched this socket out of the
--
2.35.1
CC'ing virtio-dev@lists.oasis-open.org
On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> This patch supports dgram in virtio and on the vhost side.
>
> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> ---
> drivers/vhost/vsock.c | 2 +-
> include/net/af_vsock.h | 2 +
> include/uapi/linux/virtio_vsock.h | 1 +
> net/vmw_vsock/af_vsock.c | 26 +++-
> net/vmw_vsock/virtio_transport.c | 2 +-
> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
> 6 files changed, 186 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index a5d1bdb786fe..3dc72a5647ca 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> int ret;
>
> ret = vsock_core_register(&vhost_transport.transport,
> - VSOCK_TRANSPORT_F_H2G);
> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
> if (ret < 0)
> return ret;
>
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index 1c53c4c4d88f..37e55c81e4df 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -78,6 +78,8 @@ struct vsock_sock {
> s64 vsock_stream_has_data(struct vsock_sock *vsk);
> s64 vsock_stream_has_space(struct vsock_sock *vsk);
> struct sock *vsock_create_connected(struct sock *parent);
> +int vsock_bind_stream(struct vsock_sock *vsk,
> + struct sockaddr_vm *addr);
>
> /**** TRANSPORT ****/
>
> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> index 857df3a3a70d..0975b9c88292 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> enum virtio_vsock_type {
> VIRTIO_VSOCK_TYPE_STREAM = 1,
> VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> + VIRTIO_VSOCK_TYPE_DGRAM = 3,
> };
>
> enum virtio_vsock_op {
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index 1893f8aafa48..87e4ae1866d3 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> return 0;
> }
>
> +int vsock_bind_stream(struct vsock_sock *vsk,
> + struct sockaddr_vm *addr)
> +{
> + int retval;
> +
> + spin_lock_bh(&vsock_table_lock);
> + retval = __vsock_bind_connectible(vsk, addr);
> + spin_unlock_bh(&vsock_table_lock);
> +
> + return retval;
> +}
> +EXPORT_SYMBOL(vsock_bind_stream);
> +
> static int __vsock_bind_dgram(struct vsock_sock *vsk,
> struct sockaddr_vm *addr)
> {
> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
> }
>
> if (features & VSOCK_TRANSPORT_F_DGRAM) {
> - if (t_dgram) {
> - err = -EBUSY;
> - goto err_busy;
> + /* TODO: always chose the G2H variant over others, support nesting later */
> + if (features & VSOCK_TRANSPORT_F_G2H) {
> + if (t_dgram)
> + pr_warn("virtio_vsock: t_dgram already set\n");
> + t_dgram = t;
> + }
> +
> + if (!t_dgram) {
> + t_dgram = t;
> }
> - t_dgram = t;
> }
>
> if (features & VSOCK_TRANSPORT_F_LOCAL) {
> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> index 073314312683..d4526ca462d2 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> return -ENOMEM;
>
> ret = vsock_core_register(&virtio_transport.transport,
> - VSOCK_TRANSPORT_F_G2H);
> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
> if (ret)
> goto out_wq;
>
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index bdf16fff054f..aedb48728677 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>
> static u16 virtio_transport_get_type(struct sock *sk)
> {
> - if (sk->sk_type == SOCK_STREAM)
> + if (sk->sk_type == SOCK_DGRAM)
> + return VIRTIO_VSOCK_TYPE_DGRAM;
> + else if (sk->sk_type == SOCK_STREAM)
> return VIRTIO_VSOCK_TYPE_STREAM;
> else
> return VIRTIO_VSOCK_TYPE_SEQPACKET;
> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> vvs = vsk->trans;
>
> /* we can send less than pkt_len bytes */
> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> + else
> + return 0;
> + }
>
> - /* virtio_transport_get_credit might return less than pkt_len credit */
> - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> + /* virtio_transport_get_credit might return less than pkt_len credit */
> + pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>
> - /* Do not send zero length OP_RW pkt */
> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> - return pkt_len;
> + /* Do not send zero length OP_RW pkt */
> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> + return pkt_len;
> + }
>
> skb = virtio_transport_alloc_skb(info, pkt_len,
> src_cid, src_port,
> dst_cid, dst_port,
> &err);
> if (!skb) {
> - virtio_transport_put_credit(vvs, pkt_len);
> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> + virtio_transport_put_credit(vvs, pkt_len);
> return err;
> }
>
> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> }
> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
>
> +static ssize_t
> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> + struct msghdr *msg, size_t len)
> +{
> + struct virtio_vsock_sock *vvs = vsk->trans;
> + struct sk_buff *skb;
> + size_t total = 0;
> + u32 free_space;
> + int err = -EFAULT;
> +
> + spin_lock_bh(&vvs->rx_lock);
> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
> + skb = __skb_dequeue(&vvs->rx_queue);
> +
> + total = len;
> + if (total > skb->len - vsock_metadata(skb)->off)
> + total = skb->len - vsock_metadata(skb)->off;
> + else if (total < skb->len - vsock_metadata(skb)->off)
> + msg->msg_flags |= MSG_TRUNC;
> +
> + /* sk_lock is held by caller so no one else can dequeue.
> + * Unlock rx_lock since memcpy_to_msg() may sleep.
> + */
> + spin_unlock_bh(&vvs->rx_lock);
> +
> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
> + if (err)
> + return err;
> +
> + spin_lock_bh(&vvs->rx_lock);
> +
> + virtio_transport_dec_rx_pkt(vvs, skb);
> + consume_skb(skb);
> + }
> +
> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
> +
> + spin_unlock_bh(&vvs->rx_lock);
> +
> + if (total > 0 && msg->msg_name) {
> + /* Provide the address of the sender. */
> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> +
> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
> + le32_to_cpu(vsock_hdr(skb)->src_port));
> + msg->msg_namelen = sizeof(*vm_addr);
> + }
> + return total;
> +}
> +
> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
> +{
> + return virtio_transport_stream_has_data(vsk);
> +}
> +
> int
> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> struct msghdr *msg,
> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> struct msghdr *msg,
> size_t len, int flags)
> {
> - return -EOPNOTSUPP;
> + struct sock *sk;
> + size_t err = 0;
> + long timeout;
> +
> + DEFINE_WAIT(wait);
> +
> + sk = &vsk->sk;
> + err = 0;
> +
> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
> + return -EOPNOTSUPP;
> +
> + lock_sock(sk);
> +
> + if (!len)
> + goto out;
> +
> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> +
> + while (1) {
> + s64 ready;
> +
> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
> + ready = virtio_transport_dgram_has_data(vsk);
> +
> + if (ready == 0) {
> + if (timeout == 0) {
> + err = -EAGAIN;
> + finish_wait(sk_sleep(sk), &wait);
> + break;
> + }
> +
> + release_sock(sk);
> + timeout = schedule_timeout(timeout);
> + lock_sock(sk);
> +
> + if (signal_pending(current)) {
> + err = sock_intr_errno(timeout);
> + finish_wait(sk_sleep(sk), &wait);
> + break;
> + } else if (timeout == 0) {
> + err = -EAGAIN;
> + finish_wait(sk_sleep(sk), &wait);
> + break;
> + }
> + } else {
> + finish_wait(sk_sleep(sk), &wait);
> +
> + if (ready < 0) {
> + err = -ENOMEM;
> + goto out;
> + }
> +
> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
> + break;
> + }
> + }
> +out:
> + release_sock(sk);
> + return err;
> }
> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
>
> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> struct sockaddr_vm *addr)
> {
> - return -EOPNOTSUPP;
> + return vsock_bind_stream(vsk, addr);
> }
> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
>
> bool virtio_transport_dgram_allow(u32 cid, u32 port)
> {
> - return false;
> + return true;
> }
> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
>
> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> struct msghdr *msg,
> size_t dgram_len)
> {
> - return -EOPNOTSUPP;
> + struct virtio_vsock_pkt_info info = {
> + .op = VIRTIO_VSOCK_OP_RW,
> + .msg = msg,
> + .pkt_len = dgram_len,
> + .vsk = vsk,
> + .remote_cid = remote_addr->svm_cid,
> + .remote_port = remote_addr->svm_port,
> + };
> +
> + return virtio_transport_send_pkt_info(vsk, &info);
> }
> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
>
> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
> struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> int err = 0;
>
> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> + virtio_transport_recv_enqueue(vsk, skb);
> + sk->sk_data_ready(sk);
> + return err;
> + }
> +
> switch (le16_to_cpu(hdr->op)) {
> case VIRTIO_VSOCK_OP_RW:
> virtio_transport_recv_enqueue(vsk, skb);
> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> static bool virtio_transport_valid_type(u16 type)
> {
> return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> + (type == VIRTIO_VSOCK_TYPE_DGRAM);
> }
>
> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> goto free_pkt;
> }
>
> + if (sk->sk_type == SOCK_DGRAM) {
> + virtio_transport_recv_connected(sk, skb);
> + goto out;
> + }
> +
> space_available = virtio_transport_space_update(sk, skb);
>
> /* Update CID in case it has changed after a transport reset event */
> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> break;
> }
>
> +out:
> release_sock(sk);
>
> /* Release refcnt obtained when we fetched this socket out of the
> --
> 2.35.1
>
On 16.08.2022 05:32, Bobby Eshleman wrote:
> CC'ing virtio-dev@lists.oasis-open.org
>
> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
>> This patch supports dgram in virtio and on the vhost side.
Hello,
sorry, i don't understand, how this maintains message boundaries? Or it
is unnecessary for SOCK_DGRAM?
Thanks
>>
>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> ---
>> drivers/vhost/vsock.c | 2 +-
>> include/net/af_vsock.h | 2 +
>> include/uapi/linux/virtio_vsock.h | 1 +
>> net/vmw_vsock/af_vsock.c | 26 +++-
>> net/vmw_vsock/virtio_transport.c | 2 +-
>> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
>> 6 files changed, 186 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index a5d1bdb786fe..3dc72a5647ca 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>> int ret;
>>
>> ret = vsock_core_register(&vhost_transport.transport,
>> - VSOCK_TRANSPORT_F_H2G);
>> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
>> if (ret < 0)
>> return ret;
>>
>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>> index 1c53c4c4d88f..37e55c81e4df 100644
>> --- a/include/net/af_vsock.h
>> +++ b/include/net/af_vsock.h
>> @@ -78,6 +78,8 @@ struct vsock_sock {
>> s64 vsock_stream_has_data(struct vsock_sock *vsk);
>> s64 vsock_stream_has_space(struct vsock_sock *vsk);
>> struct sock *vsock_create_connected(struct sock *parent);
>> +int vsock_bind_stream(struct vsock_sock *vsk,
>> + struct sockaddr_vm *addr);
>>
>> /**** TRANSPORT ****/
>>
>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>> index 857df3a3a70d..0975b9c88292 100644
>> --- a/include/uapi/linux/virtio_vsock.h
>> +++ b/include/uapi/linux/virtio_vsock.h
>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>> enum virtio_vsock_type {
>> VIRTIO_VSOCK_TYPE_STREAM = 1,
>> VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>> + VIRTIO_VSOCK_TYPE_DGRAM = 3,
>> };
>>
>> enum virtio_vsock_op {
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index 1893f8aafa48..87e4ae1866d3 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
>> return 0;
>> }
>>
>> +int vsock_bind_stream(struct vsock_sock *vsk,
>> + struct sockaddr_vm *addr)
>> +{
>> + int retval;
>> +
>> + spin_lock_bh(&vsock_table_lock);
>> + retval = __vsock_bind_connectible(vsk, addr);
>> + spin_unlock_bh(&vsock_table_lock);
>> +
>> + return retval;
>> +}
>> +EXPORT_SYMBOL(vsock_bind_stream);
>> +
>> static int __vsock_bind_dgram(struct vsock_sock *vsk,
>> struct sockaddr_vm *addr)
>> {
>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
>> }
>>
>> if (features & VSOCK_TRANSPORT_F_DGRAM) {
>> - if (t_dgram) {
>> - err = -EBUSY;
>> - goto err_busy;
>> + /* TODO: always chose the G2H variant over others, support nesting later */
>> + if (features & VSOCK_TRANSPORT_F_G2H) {
>> + if (t_dgram)
>> + pr_warn("virtio_vsock: t_dgram already set\n");
>> + t_dgram = t;
>> + }
>> +
>> + if (!t_dgram) {
>> + t_dgram = t;
>> }
>> - t_dgram = t;
>> }
>>
>> if (features & VSOCK_TRANSPORT_F_LOCAL) {
>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>> index 073314312683..d4526ca462d2 100644
>> --- a/net/vmw_vsock/virtio_transport.c
>> +++ b/net/vmw_vsock/virtio_transport.c
>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>> return -ENOMEM;
>>
>> ret = vsock_core_register(&virtio_transport.transport,
>> - VSOCK_TRANSPORT_F_G2H);
>> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
>> if (ret)
>> goto out_wq;
>>
>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>> index bdf16fff054f..aedb48728677 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>>
>> static u16 virtio_transport_get_type(struct sock *sk)
>> {
>> - if (sk->sk_type == SOCK_STREAM)
>> + if (sk->sk_type == SOCK_DGRAM)
>> + return VIRTIO_VSOCK_TYPE_DGRAM;
>> + else if (sk->sk_type == SOCK_STREAM)
>> return VIRTIO_VSOCK_TYPE_STREAM;
>> else
>> return VIRTIO_VSOCK_TYPE_SEQPACKET;
>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>> vvs = vsk->trans;
>>
>> /* we can send less than pkt_len bytes */
>> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
>> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>> + else
>> + return 0;
>> + }
>>
>> - /* virtio_transport_get_credit might return less than pkt_len credit */
>> - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
>> + /* virtio_transport_get_credit might return less than pkt_len credit */
>> + pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>
>> - /* Do not send zero length OP_RW pkt */
>> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>> - return pkt_len;
>> + /* Do not send zero length OP_RW pkt */
>> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>> + return pkt_len;
>> + }
>>
>> skb = virtio_transport_alloc_skb(info, pkt_len,
>> src_cid, src_port,
>> dst_cid, dst_port,
>> &err);
>> if (!skb) {
>> - virtio_transport_put_credit(vvs, pkt_len);
>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>> + virtio_transport_put_credit(vvs, pkt_len);
>> return err;
>> }
>>
>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
>> }
>> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
>>
>> +static ssize_t
>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
>> + struct msghdr *msg, size_t len)
>> +{
>> + struct virtio_vsock_sock *vvs = vsk->trans;
>> + struct sk_buff *skb;
>> + size_t total = 0;
>> + u32 free_space;
>> + int err = -EFAULT;
>> +
>> + spin_lock_bh(&vvs->rx_lock);
>> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
>> + skb = __skb_dequeue(&vvs->rx_queue);
>> +
>> + total = len;
>> + if (total > skb->len - vsock_metadata(skb)->off)
>> + total = skb->len - vsock_metadata(skb)->off;
>> + else if (total < skb->len - vsock_metadata(skb)->off)
>> + msg->msg_flags |= MSG_TRUNC;
>> +
>> + /* sk_lock is held by caller so no one else can dequeue.
>> + * Unlock rx_lock since memcpy_to_msg() may sleep.
>> + */
>> + spin_unlock_bh(&vvs->rx_lock);
>> +
>> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
>> + if (err)
>> + return err;
>> +
>> + spin_lock_bh(&vvs->rx_lock);
>> +
>> + virtio_transport_dec_rx_pkt(vvs, skb);
>> + consume_skb(skb);
>> + }
>> +
>> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
>> +
>> + spin_unlock_bh(&vvs->rx_lock);
>> +
>> + if (total > 0 && msg->msg_name) {
>> + /* Provide the address of the sender. */
>> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
>> +
>> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
>> + le32_to_cpu(vsock_hdr(skb)->src_port));
>> + msg->msg_namelen = sizeof(*vm_addr);
>> + }
>> + return total;
>> +}
>> +
>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
>> +{
>> + return virtio_transport_stream_has_data(vsk);
>> +}
>> +
>> int
>> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
>> struct msghdr *msg,
>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
>> struct msghdr *msg,
>> size_t len, int flags)
>> {
>> - return -EOPNOTSUPP;
>> + struct sock *sk;
>> + size_t err = 0;
>> + long timeout;
>> +
>> + DEFINE_WAIT(wait);
>> +
>> + sk = &vsk->sk;
>> + err = 0;
>> +
>> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
>> + return -EOPNOTSUPP;
>> +
>> + lock_sock(sk);
>> +
>> + if (!len)
>> + goto out;
>> +
>> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>> +
>> + while (1) {
>> + s64 ready;
>> +
>> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
>> + ready = virtio_transport_dgram_has_data(vsk);
>> +
>> + if (ready == 0) {
>> + if (timeout == 0) {
>> + err = -EAGAIN;
>> + finish_wait(sk_sleep(sk), &wait);
>> + break;
>> + }
>> +
>> + release_sock(sk);
>> + timeout = schedule_timeout(timeout);
>> + lock_sock(sk);
>> +
>> + if (signal_pending(current)) {
>> + err = sock_intr_errno(timeout);
>> + finish_wait(sk_sleep(sk), &wait);
>> + break;
>> + } else if (timeout == 0) {
>> + err = -EAGAIN;
>> + finish_wait(sk_sleep(sk), &wait);
>> + break;
>> + }
>> + } else {
>> + finish_wait(sk_sleep(sk), &wait);
>> +
>> + if (ready < 0) {
>> + err = -ENOMEM;
>> + goto out;
>> + }
>> +
>> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
>> + break;
>> + }
>> + }
>> +out:
>> + release_sock(sk);
>> + return err;
>> }
>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
>>
>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
>> int virtio_transport_dgram_bind(struct vsock_sock *vsk,
>> struct sockaddr_vm *addr)
>> {
>> - return -EOPNOTSUPP;
>> + return vsock_bind_stream(vsk, addr);
>> }
>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
>>
>> bool virtio_transport_dgram_allow(u32 cid, u32 port)
>> {
>> - return false;
>> + return true;
>> }
>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
>>
>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
>> struct msghdr *msg,
>> size_t dgram_len)
>> {
>> - return -EOPNOTSUPP;
>> + struct virtio_vsock_pkt_info info = {
>> + .op = VIRTIO_VSOCK_OP_RW,
>> + .msg = msg,
>> + .pkt_len = dgram_len,
>> + .vsk = vsk,
>> + .remote_cid = remote_addr->svm_cid,
>> + .remote_port = remote_addr->svm_port,
>> + };
>> +
>> + return virtio_transport_send_pkt_info(vsk, &info);
>> }
>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
>>
>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
>> struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
>> int err = 0;
>>
>> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
>> + virtio_transport_recv_enqueue(vsk, skb);
>> + sk->sk_data_ready(sk);
>> + return err;
>> + }
>> +
>> switch (le16_to_cpu(hdr->op)) {
>> case VIRTIO_VSOCK_OP_RW:
>> virtio_transport_recv_enqueue(vsk, skb);
>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
>> static bool virtio_transport_valid_type(u16 type)
>> {
>> return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>> + (type == VIRTIO_VSOCK_TYPE_DGRAM);
>> }
>>
>> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>> goto free_pkt;
>> }
>>
>> + if (sk->sk_type == SOCK_DGRAM) {
>> + virtio_transport_recv_connected(sk, skb);
>> + goto out;
>> + }
>> +
>> space_available = virtio_transport_space_update(sk, skb);
>>
>> /* Update CID in case it has changed after a transport reset event */
>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>> break;
>> }
>>
>> +out:
>> release_sock(sk);
>>
>> /* Release refcnt obtained when we fetched this socket out of the
>> --
>> 2.35.1
>>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
On Wed, Aug 17, 2022 at 05:01:00AM +0000, Arseniy Krasnov wrote:
> On 16.08.2022 05:32, Bobby Eshleman wrote:
> > CC'ing virtio-dev@lists.oasis-open.org
> >
> > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> >> This patch supports dgram in virtio and on the vhost side.
> Hello,
>
> sorry, i don't understand, how this maintains message boundaries? Or it
> is unnecessary for SOCK_DGRAM?
>
> Thanks
If I understand your question, the length is included in the header, so
receivers always know that header start + header length + payload length
marks the message boundary.
> >>
> >> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> >> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >> ---
> >> drivers/vhost/vsock.c | 2 +-
> >> include/net/af_vsock.h | 2 +
> >> include/uapi/linux/virtio_vsock.h | 1 +
> >> net/vmw_vsock/af_vsock.c | 26 +++-
> >> net/vmw_vsock/virtio_transport.c | 2 +-
> >> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
> >> 6 files changed, 186 insertions(+), 20 deletions(-)
> >>
> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >> index a5d1bdb786fe..3dc72a5647ca 100644
> >> --- a/drivers/vhost/vsock.c
> >> +++ b/drivers/vhost/vsock.c
> >> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> >> int ret;
> >>
> >> ret = vsock_core_register(&vhost_transport.transport,
> >> - VSOCK_TRANSPORT_F_H2G);
> >> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
> >> if (ret < 0)
> >> return ret;
> >>
> >> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> >> index 1c53c4c4d88f..37e55c81e4df 100644
> >> --- a/include/net/af_vsock.h
> >> +++ b/include/net/af_vsock.h
> >> @@ -78,6 +78,8 @@ struct vsock_sock {
> >> s64 vsock_stream_has_data(struct vsock_sock *vsk);
> >> s64 vsock_stream_has_space(struct vsock_sock *vsk);
> >> struct sock *vsock_create_connected(struct sock *parent);
> >> +int vsock_bind_stream(struct vsock_sock *vsk,
> >> + struct sockaddr_vm *addr);
> >>
> >> /**** TRANSPORT ****/
> >>
> >> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >> index 857df3a3a70d..0975b9c88292 100644
> >> --- a/include/uapi/linux/virtio_vsock.h
> >> +++ b/include/uapi/linux/virtio_vsock.h
> >> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> >> enum virtio_vsock_type {
> >> VIRTIO_VSOCK_TYPE_STREAM = 1,
> >> VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> >> + VIRTIO_VSOCK_TYPE_DGRAM = 3,
> >> };
> >>
> >> enum virtio_vsock_op {
> >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >> index 1893f8aafa48..87e4ae1866d3 100644
> >> --- a/net/vmw_vsock/af_vsock.c
> >> +++ b/net/vmw_vsock/af_vsock.c
> >> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> >> return 0;
> >> }
> >>
> >> +int vsock_bind_stream(struct vsock_sock *vsk,
> >> + struct sockaddr_vm *addr)
> >> +{
> >> + int retval;
> >> +
> >> + spin_lock_bh(&vsock_table_lock);
> >> + retval = __vsock_bind_connectible(vsk, addr);
> >> + spin_unlock_bh(&vsock_table_lock);
> >> +
> >> + return retval;
> >> +}
> >> +EXPORT_SYMBOL(vsock_bind_stream);
> >> +
> >> static int __vsock_bind_dgram(struct vsock_sock *vsk,
> >> struct sockaddr_vm *addr)
> >> {
> >> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
> >> }
> >>
> >> if (features & VSOCK_TRANSPORT_F_DGRAM) {
> >> - if (t_dgram) {
> >> - err = -EBUSY;
> >> - goto err_busy;
> >> + /* TODO: always chose the G2H variant over others, support nesting later */
> >> + if (features & VSOCK_TRANSPORT_F_G2H) {
> >> + if (t_dgram)
> >> + pr_warn("virtio_vsock: t_dgram already set\n");
> >> + t_dgram = t;
> >> + }
> >> +
> >> + if (!t_dgram) {
> >> + t_dgram = t;
> >> }
> >> - t_dgram = t;
> >> }
> >>
> >> if (features & VSOCK_TRANSPORT_F_LOCAL) {
> >> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> >> index 073314312683..d4526ca462d2 100644
> >> --- a/net/vmw_vsock/virtio_transport.c
> >> +++ b/net/vmw_vsock/virtio_transport.c
> >> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> >> return -ENOMEM;
> >>
> >> ret = vsock_core_register(&virtio_transport.transport,
> >> - VSOCK_TRANSPORT_F_G2H);
> >> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
> >> if (ret)
> >> goto out_wq;
> >>
> >> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >> index bdf16fff054f..aedb48728677 100644
> >> --- a/net/vmw_vsock/virtio_transport_common.c
> >> +++ b/net/vmw_vsock/virtio_transport_common.c
> >> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> >>
> >> static u16 virtio_transport_get_type(struct sock *sk)
> >> {
> >> - if (sk->sk_type == SOCK_STREAM)
> >> + if (sk->sk_type == SOCK_DGRAM)
> >> + return VIRTIO_VSOCK_TYPE_DGRAM;
> >> + else if (sk->sk_type == SOCK_STREAM)
> >> return VIRTIO_VSOCK_TYPE_STREAM;
> >> else
> >> return VIRTIO_VSOCK_TYPE_SEQPACKET;
> >> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> >> vvs = vsk->trans;
> >>
> >> /* we can send less than pkt_len bytes */
> >> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> >> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >> + else
> >> + return 0;
> >> + }
> >>
> >> - /* virtio_transport_get_credit might return less than pkt_len credit */
> >> - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> >> + /* virtio_transport_get_credit might return less than pkt_len credit */
> >> + pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >>
> >> - /* Do not send zero length OP_RW pkt */
> >> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >> - return pkt_len;
> >> + /* Do not send zero length OP_RW pkt */
> >> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >> + return pkt_len;
> >> + }
> >>
> >> skb = virtio_transport_alloc_skb(info, pkt_len,
> >> src_cid, src_port,
> >> dst_cid, dst_port,
> >> &err);
> >> if (!skb) {
> >> - virtio_transport_put_credit(vvs, pkt_len);
> >> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >> + virtio_transport_put_credit(vvs, pkt_len);
> >> return err;
> >> }
> >>
> >> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> >> }
> >> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> >>
> >> +static ssize_t
> >> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> >> + struct msghdr *msg, size_t len)
> >> +{
> >> + struct virtio_vsock_sock *vvs = vsk->trans;
> >> + struct sk_buff *skb;
> >> + size_t total = 0;
> >> + u32 free_space;
> >> + int err = -EFAULT;
> >> +
> >> + spin_lock_bh(&vvs->rx_lock);
> >> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
> >> + skb = __skb_dequeue(&vvs->rx_queue);
> >> +
> >> + total = len;
> >> + if (total > skb->len - vsock_metadata(skb)->off)
> >> + total = skb->len - vsock_metadata(skb)->off;
> >> + else if (total < skb->len - vsock_metadata(skb)->off)
> >> + msg->msg_flags |= MSG_TRUNC;
> >> +
> >> + /* sk_lock is held by caller so no one else can dequeue.
> >> + * Unlock rx_lock since memcpy_to_msg() may sleep.
> >> + */
> >> + spin_unlock_bh(&vvs->rx_lock);
> >> +
> >> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
> >> + if (err)
> >> + return err;
> >> +
> >> + spin_lock_bh(&vvs->rx_lock);
> >> +
> >> + virtio_transport_dec_rx_pkt(vvs, skb);
> >> + consume_skb(skb);
> >> + }
> >> +
> >> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
> >> +
> >> + spin_unlock_bh(&vvs->rx_lock);
> >> +
> >> + if (total > 0 && msg->msg_name) {
> >> + /* Provide the address of the sender. */
> >> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> >> +
> >> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
> >> + le32_to_cpu(vsock_hdr(skb)->src_port));
> >> + msg->msg_namelen = sizeof(*vm_addr);
> >> + }
> >> + return total;
> >> +}
> >> +
> >> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
> >> +{
> >> + return virtio_transport_stream_has_data(vsk);
> >> +}
> >> +
> >> int
> >> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> >> struct msghdr *msg,
> >> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> >> struct msghdr *msg,
> >> size_t len, int flags)
> >> {
> >> - return -EOPNOTSUPP;
> >> + struct sock *sk;
> >> + size_t err = 0;
> >> + long timeout;
> >> +
> >> + DEFINE_WAIT(wait);
> >> +
> >> + sk = &vsk->sk;
> >> + err = 0;
> >> +
> >> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
> >> + return -EOPNOTSUPP;
> >> +
> >> + lock_sock(sk);
> >> +
> >> + if (!len)
> >> + goto out;
> >> +
> >> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> >> +
> >> + while (1) {
> >> + s64 ready;
> >> +
> >> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
> >> + ready = virtio_transport_dgram_has_data(vsk);
> >> +
> >> + if (ready == 0) {
> >> + if (timeout == 0) {
> >> + err = -EAGAIN;
> >> + finish_wait(sk_sleep(sk), &wait);
> >> + break;
> >> + }
> >> +
> >> + release_sock(sk);
> >> + timeout = schedule_timeout(timeout);
> >> + lock_sock(sk);
> >> +
> >> + if (signal_pending(current)) {
> >> + err = sock_intr_errno(timeout);
> >> + finish_wait(sk_sleep(sk), &wait);
> >> + break;
> >> + } else if (timeout == 0) {
> >> + err = -EAGAIN;
> >> + finish_wait(sk_sleep(sk), &wait);
> >> + break;
> >> + }
> >> + } else {
> >> + finish_wait(sk_sleep(sk), &wait);
> >> +
> >> + if (ready < 0) {
> >> + err = -ENOMEM;
> >> + goto out;
> >> + }
> >> +
> >> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
> >> + break;
> >> + }
> >> + }
> >> +out:
> >> + release_sock(sk);
> >> + return err;
> >> }
> >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> >>
> >> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> >> int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> >> struct sockaddr_vm *addr)
> >> {
> >> - return -EOPNOTSUPP;
> >> + return vsock_bind_stream(vsk, addr);
> >> }
> >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> >>
> >> bool virtio_transport_dgram_allow(u32 cid, u32 port)
> >> {
> >> - return false;
> >> + return true;
> >> }
> >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> >>
> >> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> >> struct msghdr *msg,
> >> size_t dgram_len)
> >> {
> >> - return -EOPNOTSUPP;
> >> + struct virtio_vsock_pkt_info info = {
> >> + .op = VIRTIO_VSOCK_OP_RW,
> >> + .msg = msg,
> >> + .pkt_len = dgram_len,
> >> + .vsk = vsk,
> >> + .remote_cid = remote_addr->svm_cid,
> >> + .remote_port = remote_addr->svm_port,
> >> + };
> >> +
> >> + return virtio_transport_send_pkt_info(vsk, &info);
> >> }
> >> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> >>
> >> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
> >> struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> >> int err = 0;
> >>
> >> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> >> + virtio_transport_recv_enqueue(vsk, skb);
> >> + sk->sk_data_ready(sk);
> >> + return err;
> >> + }
> >> +
> >> switch (le16_to_cpu(hdr->op)) {
> >> case VIRTIO_VSOCK_OP_RW:
> >> virtio_transport_recv_enqueue(vsk, skb);
> >> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> >> static bool virtio_transport_valid_type(u16 type)
> >> {
> >> return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> >> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> >> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> >> + (type == VIRTIO_VSOCK_TYPE_DGRAM);
> >> }
> >>
> >> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> >> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >> goto free_pkt;
> >> }
> >>
> >> + if (sk->sk_type == SOCK_DGRAM) {
> >> + virtio_transport_recv_connected(sk, skb);
> >> + goto out;
> >> + }
> >> +
> >> space_available = virtio_transport_space_update(sk, skb);
> >>
> >> /* Update CID in case it has changed after a transport reset event */
> >> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >> break;
> >> }
> >>
> >> +out:
> >> release_sock(sk);
> >>
> >> /* Release refcnt obtained when we fetched this socket out of the
> >> --
> >> 2.35.1
> >>
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >
>
On Tue, 2022-08-16 at 09:57 +0000, Bobby Eshleman wrote:
> On Wed, Aug 17, 2022 at 05:01:00AM +0000, Arseniy Krasnov wrote:
> > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > CC'ing virtio-dev@lists.oasis-open.org
> > >
> > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> > > > This patch supports dgram in virtio and on the vhost side.
> > Hello,
> >
> > sorry, i don't understand, how this maintains message boundaries?
> > Or it
> > is unnecessary for SOCK_DGRAM?
> >
> > Thanks
>
> If I understand your question, the length is included in the header,
> so
> receivers always know that header start + header length + payload
> length
> marks the message boundary.
I mean, consider the following case: host sends 5kb packet to guest.
Guest uses 4kb virtio rx buffers, so in drivers/vhost/vsock.c this 5kb
packet(e.g. its payload) will be placed to 2 virtio rx buffers - 4kb
to first buffer and rest 1kb to second buffer. Is it implemented, that
receiver gets whole 5kb piece of data during single 'read()/recv()'
system call?
Thanks
>
> > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > > > ---
> > > > drivers/vhost/vsock.c | 2 +-
> > > > include/net/af_vsock.h | 2 +
> > > > include/uapi/linux/virtio_vsock.h | 1 +
> > > > net/vmw_vsock/af_vsock.c | 26 +++-
> > > > net/vmw_vsock/virtio_transport.c | 2 +-
> > > > net/vmw_vsock/virtio_transport_common.c | 173
> > > > ++++++++++++++++++++++--
> > > > 6 files changed, 186 insertions(+), 20 deletions(-)
> > > >
> > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > --- a/drivers/vhost/vsock.c
> > > > +++ b/drivers/vhost/vsock.c
> > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> > > > int ret;
> > > >
> > > > ret = vsock_core_register(&vhost_transport.transport,
> > > > - VSOCK_TRANSPORT_F_H2G);
> > > > + VSOCK_TRANSPORT_F_H2G |
> > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > if (ret < 0)
> > > > return ret;
> > > >
> > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > --- a/include/net/af_vsock.h
> > > > +++ b/include/net/af_vsock.h
> > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > struct sock *vsock_create_connected(struct sock *parent);
> > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > + struct sockaddr_vm *addr);
> > > >
> > > > /**** TRANSPORT ****/
> > > >
> > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > b/include/uapi/linux/virtio_vsock.h
> > > > index 857df3a3a70d..0975b9c88292 100644
> > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > enum virtio_vsock_type {
> > > > VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > };
> > > >
> > > > enum virtio_vsock_op {
> > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > b/net/vmw_vsock/af_vsock.c
> > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > --- a/net/vmw_vsock/af_vsock.c
> > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct
> > > > vsock_sock *vsk,
> > > > return 0;
> > > > }
> > > >
> > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > + struct sockaddr_vm *addr)
> > > > +{
> > > > + int retval;
> > > > +
> > > > + spin_lock_bh(&vsock_table_lock);
> > > > + retval = __vsock_bind_connectible(vsk, addr);
> > > > + spin_unlock_bh(&vsock_table_lock);
> > > > +
> > > > + return retval;
> > > > +}
> > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > +
> > > > static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > struct sockaddr_vm *addr)
> > > > {
> > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct
> > > > vsock_transport *t, int features)
> > > > }
> > > >
> > > > if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > - if (t_dgram) {
> > > > - err = -EBUSY;
> > > > - goto err_busy;
> > > > + /* TODO: always chose the G2H variant over
> > > > others, support nesting later */
> > > > + if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > + if (t_dgram)
> > > > + pr_warn("virtio_vsock: t_dgram
> > > > already set\n");
> > > > + t_dgram = t;
> > > > + }
> > > > +
> > > > + if (!t_dgram) {
> > > > + t_dgram = t;
> > > > }
> > > > - t_dgram = t;
> > > > }
> > > >
> > > > if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > b/net/vmw_vsock/virtio_transport.c
> > > > index 073314312683..d4526ca462d2 100644
> > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> > > > return -ENOMEM;
> > > >
> > > > ret = vsock_core_register(&virtio_transport.transport,
> > > > - VSOCK_TRANSPORT_F_G2H);
> > > > + VSOCK_TRANSPORT_F_G2H |
> > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > if (ret)
> > > > goto out_wq;
> > > >
> > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > index bdf16fff054f..aedb48728677 100644
> > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > @@ -229,7 +229,9 @@
> > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > >
> > > > static u16 virtio_transport_get_type(struct sock *sk)
> > > > {
> > > > - if (sk->sk_type == SOCK_STREAM)
> > > > + if (sk->sk_type == SOCK_DGRAM)
> > > > + return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > + else if (sk->sk_type == SOCK_STREAM)
> > > > return VIRTIO_VSOCK_TYPE_STREAM;
> > > > else
> > > > return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > @@ -287,22 +289,29 @@ static int
> > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > vvs = vsk->trans;
> > > >
> > > > /* we can send less than pkt_len bytes */
> > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > + pkt_len =
> > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > + else
> > > > + return 0;
> > > > + }
> > > >
> > > > - /* virtio_transport_get_credit might return less than
> > > > pkt_len credit */
> > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > + /* virtio_transport_get_credit might return
> > > > less than pkt_len credit */
> > > > + pkt_len = virtio_transport_get_credit(vvs,
> > > > pkt_len);
> > > >
> > > > - /* Do not send zero length OP_RW pkt */
> > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > - return pkt_len;
> > > > + /* Do not send zero length OP_RW pkt */
> > > > + if (pkt_len == 0 && info->op ==
> > > > VIRTIO_VSOCK_OP_RW)
> > > > + return pkt_len;
> > > > + }
> > > >
> > > > skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > src_cid, src_port,
> > > > dst_cid, dst_port,
> > > > &err);
> > > > if (!skb) {
> > > > - virtio_transport_put_credit(vvs, pkt_len);
> > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > + virtio_transport_put_credit(vvs,
> > > > pkt_len);
> > > > return err;
> > > > }
> > > >
> > > > @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct
> > > > vsock_sock *vsk,
> > > > }
> > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > >
> > > > +static ssize_t
> > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> > > > + struct msghdr *msg, size_t
> > > > len)
> > > > +{
> > > > + struct virtio_vsock_sock *vvs = vsk->trans;
> > > > + struct sk_buff *skb;
> > > > + size_t total = 0;
> > > > + u32 free_space;
> > > > + int err = -EFAULT;
> > > > +
> > > > + spin_lock_bh(&vvs->rx_lock);
> > > > + if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > >rx_queue)) {
> > > > + skb = __skb_dequeue(&vvs->rx_queue);
> > > > +
> > > > + total = len;
> > > > + if (total > skb->len - vsock_metadata(skb)-
> > > > >off)
> > > > + total = skb->len - vsock_metadata(skb)-
> > > > >off;
> > > > + else if (total < skb->len -
> > > > vsock_metadata(skb)->off)
> > > > + msg->msg_flags |= MSG_TRUNC;
> > > > +
> > > > + /* sk_lock is held by caller so no one else can
> > > > dequeue.
> > > > + * Unlock rx_lock since memcpy_to_msg() may
> > > > sleep.
> > > > + */
> > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > +
> > > > + err = memcpy_to_msg(msg, skb->data +
> > > > vsock_metadata(skb)->off, total);
> > > > + if (err)
> > > > + return err;
> > > > +
> > > > + spin_lock_bh(&vvs->rx_lock);
> > > > +
> > > > + virtio_transport_dec_rx_pkt(vvs, skb);
> > > > + consume_skb(skb);
> > > > + }
> > > > +
> > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > >last_fwd_cnt);
> > > > +
> > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > +
> > > > + if (total > 0 && msg->msg_name) {
> > > > + /* Provide the address of the sender. */
> > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > msg->msg_name);
> > > > +
> > > > + vsock_addr_init(vm_addr,
> > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > + le32_to_cpu(vsock_hdr(skb)-
> > > > >src_port));
> > > > + msg->msg_namelen = sizeof(*vm_addr);
> > > > + }
> > > > + return total;
> > > > +}
> > > > +
> > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock
> > > > *vsk)
> > > > +{
> > > > + return virtio_transport_stream_has_data(vsk);
> > > > +}
> > > > +
> > > > int
> > > > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> > > > struct msghdr *msg,
> > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct
> > > > vsock_sock *vsk,
> > > > struct msghdr *msg,
> > > > size_t len, int flags)
> > > > {
> > > > - return -EOPNOTSUPP;
> > > > + struct sock *sk;
> > > > + size_t err = 0;
> > > > + long timeout;
> > > > +
> > > > + DEFINE_WAIT(wait);
> > > > +
> > > > + sk = &vsk->sk;
> > > > + err = 0;
> > > > +
> > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > MSG_PEEK)
> > > > + return -EOPNOTSUPP;
> > > > +
> > > > + lock_sock(sk);
> > > > +
> > > > + if (!len)
> > > > + goto out;
> > > > +
> > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > +
> > > > + while (1) {
> > > > + s64 ready;
> > > > +
> > > > + prepare_to_wait(sk_sleep(sk), &wait,
> > > > TASK_INTERRUPTIBLE);
> > > > + ready = virtio_transport_dgram_has_data(vsk);
> > > > +
> > > > + if (ready == 0) {
> > > > + if (timeout == 0) {
> > > > + err = -EAGAIN;
> > > > + finish_wait(sk_sleep(sk),
> > > > &wait);
> > > > + break;
> > > > + }
> > > > +
> > > > + release_sock(sk);
> > > > + timeout = schedule_timeout(timeout);
> > > > + lock_sock(sk);
> > > > +
> > > > + if (signal_pending(current)) {
> > > > + err = sock_intr_errno(timeout);
> > > > + finish_wait(sk_sleep(sk),
> > > > &wait);
> > > > + break;
> > > > + } else if (timeout == 0) {
> > > > + err = -EAGAIN;
> > > > + finish_wait(sk_sleep(sk),
> > > > &wait);
> > > > + break;
> > > > + }
> > > > + } else {
> > > > + finish_wait(sk_sleep(sk), &wait);
> > > > +
> > > > + if (ready < 0) {
> > > > + err = -ENOMEM;
> > > > + goto out;
> > > > + }
> > > > +
> > > > + err =
> > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > + break;
> > > > + }
> > > > + }
> > > > +out:
> > > > + release_sock(sk);
> > > > + return err;
> > > > }
> > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > > >
> > > > @@ -819,13 +942,13 @@
> > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > struct sockaddr_vm *addr)
> > > > {
> > > > - return -EOPNOTSUPP;
> > > > + return vsock_bind_stream(vsk, addr);
> > > > }
> > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > >
> > > > bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > {
> > > > - return false;
> > > > + return true;
> > > > }
> > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > >
> > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct
> > > > vsock_sock *vsk,
> > > > struct msghdr *msg,
> > > > size_t dgram_len)
> > > > {
> > > > - return -EOPNOTSUPP;
> > > > + struct virtio_vsock_pkt_info info = {
> > > > + .op = VIRTIO_VSOCK_OP_RW,
> > > > + .msg = msg,
> > > > + .pkt_len = dgram_len,
> > > > + .vsk = vsk,
> > > > + .remote_cid = remote_addr->svm_cid,
> > > > + .remote_port = remote_addr->svm_port,
> > > > + };
> > > > +
> > > > + return virtio_transport_send_pkt_info(vsk, &info);
> > > > }
> > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > >
> > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct
> > > > sock *sk,
> > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > int err = 0;
> > > >
> > > > + if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > + virtio_transport_recv_enqueue(vsk, skb);
> > > > + sk->sk_data_ready(sk);
> > > > + return err;
> > > > + }
> > > > +
> > > > switch (le16_to_cpu(hdr->op)) {
> > > > case VIRTIO_VSOCK_OP_RW:
> > > > virtio_transport_recv_enqueue(vsk, skb);
> > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock
> > > > *sk, struct sk_buff *skb,
> > > > static bool virtio_transport_valid_type(u16 type)
> > > > {
> > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > }
> > > >
> > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost-
> > > > vsock's vq->mutex
> > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct
> > > > virtio_transport *t,
> > > > goto free_pkt;
> > > > }
> > > >
> > > > + if (sk->sk_type == SOCK_DGRAM) {
> > > > + virtio_transport_recv_connected(sk, skb);
> > > > + goto out;
> > > > + }
> > > > +
> > > > space_available = virtio_transport_space_update(sk,
> > > > skb);
> > > >
> > > > /* Update CID in case it has changed after a transport
> > > > reset event */
> > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct
> > > > virtio_transport *t,
> > > > break;
> > > > }
> > > >
> > > > +out:
> > > > release_sock(sk);
> > > >
> > > > /* Release refcnt obtained when we fetched this socket
> > > > out of the
> > > > --
> > > > 2.35.1
> > > >
> > >
> > > ---------------------------------------------------------------
> > > ------
> > > To unsubscribe, e-mail:
> > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > For additional commands, e-mail:
> > > virtio-dev-help@lists.oasis-open.org
> > >
On 17.08.2022 08:01, Arseniy Krasnov wrote:
> On 16.08.2022 05:32, Bobby Eshleman wrote:
>> CC'ing virtio-dev@lists.oasis-open.org
>>
>> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
>>> This patch supports dgram in virtio and on the vhost side.
> Hello,
>
> sorry, i don't understand, how this maintains message boundaries? Or it
> is unnecessary for SOCK_DGRAM?
>
> Thanks
>>>
>>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
>>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>>> ---
>>> drivers/vhost/vsock.c | 2 +-
>>> include/net/af_vsock.h | 2 +
>>> include/uapi/linux/virtio_vsock.h | 1 +
>>> net/vmw_vsock/af_vsock.c | 26 +++-
>>> net/vmw_vsock/virtio_transport.c | 2 +-
>>> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
>>> 6 files changed, 186 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>> index a5d1bdb786fe..3dc72a5647ca 100644
>>> --- a/drivers/vhost/vsock.c
>>> +++ b/drivers/vhost/vsock.c
>>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>>> int ret;
>>>
>>> ret = vsock_core_register(&vhost_transport.transport,
>>> - VSOCK_TRANSPORT_F_H2G);
>>> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
>>> if (ret < 0)
>>> return ret;
>>>
>>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>>> index 1c53c4c4d88f..37e55c81e4df 100644
>>> --- a/include/net/af_vsock.h
>>> +++ b/include/net/af_vsock.h
>>> @@ -78,6 +78,8 @@ struct vsock_sock {
>>> s64 vsock_stream_has_data(struct vsock_sock *vsk);
>>> s64 vsock_stream_has_space(struct vsock_sock *vsk);
>>> struct sock *vsock_create_connected(struct sock *parent);
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> + struct sockaddr_vm *addr);
>>>
>>> /**** TRANSPORT ****/
>>>
>>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
>>> index 857df3a3a70d..0975b9c88292 100644
>>> --- a/include/uapi/linux/virtio_vsock.h
>>> +++ b/include/uapi/linux/virtio_vsock.h
>>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>>> enum virtio_vsock_type {
>>> VIRTIO_VSOCK_TYPE_STREAM = 1,
>>> VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>>> + VIRTIO_VSOCK_TYPE_DGRAM = 3,
>>> };
>>>
>>> enum virtio_vsock_op {
>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>>> index 1893f8aafa48..87e4ae1866d3 100644
>>> --- a/net/vmw_vsock/af_vsock.c
>>> +++ b/net/vmw_vsock/af_vsock.c
>>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
>>> return 0;
>>> }
>>>
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> + struct sockaddr_vm *addr)
>>> +{
>>> + int retval;
>>> +
>>> + spin_lock_bh(&vsock_table_lock);
>>> + retval = __vsock_bind_connectible(vsk, addr);
>>> + spin_unlock_bh(&vsock_table_lock);
>>> +
>>> + return retval;
>>> +}
>>> +EXPORT_SYMBOL(vsock_bind_stream);
>>> +
>>> static int __vsock_bind_dgram(struct vsock_sock *vsk,
>>> struct sockaddr_vm *addr)
>>> {
>>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
>>> }
>>>
>>> if (features & VSOCK_TRANSPORT_F_DGRAM) {
>>> - if (t_dgram) {
>>> - err = -EBUSY;
>>> - goto err_busy;
>>> + /* TODO: always chose the G2H variant over others, support nesting later */
>>> + if (features & VSOCK_TRANSPORT_F_G2H) {
>>> + if (t_dgram)
>>> + pr_warn("virtio_vsock: t_dgram already set\n");
>>> + t_dgram = t;
>>> + }
>>> +
>>> + if (!t_dgram) {
>>> + t_dgram = t;
>>> }
>>> - t_dgram = t;
>>> }
>>>
>>> if (features & VSOCK_TRANSPORT_F_LOCAL) {
>>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>>> index 073314312683..d4526ca462d2 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>>> return -ENOMEM;
>>>
>>> ret = vsock_core_register(&virtio_transport.transport,
>>> - VSOCK_TRANSPORT_F_G2H);
>>> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
>>> if (ret)
>>> goto out_wq;
>>>
>>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>>> index bdf16fff054f..aedb48728677 100644
>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>>>
>>> static u16 virtio_transport_get_type(struct sock *sk)
>>> {
>>> - if (sk->sk_type == SOCK_STREAM)
>>> + if (sk->sk_type == SOCK_DGRAM)
>>> + return VIRTIO_VSOCK_TYPE_DGRAM;
>>> + else if (sk->sk_type == SOCK_STREAM)
>>> return VIRTIO_VSOCK_TYPE_STREAM;
>>> else
>>> return VIRTIO_VSOCK_TYPE_SEQPACKET;
>>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
>>> vvs = vsk->trans;
>>>
>>> /* we can send less than pkt_len bytes */
>>> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
>>> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>>> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
>>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>>> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>>> + else
>>> + return 0;
>>> + }
>>>
>>> - /* virtio_transport_get_credit might return less than pkt_len credit */
>>> - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
>>> + /* virtio_transport_get_credit might return less than pkt_len credit */
>>> + pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>>
>>> - /* Do not send zero length OP_RW pkt */
>>> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>> - return pkt_len;
>>> + /* Do not send zero length OP_RW pkt */
>>> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>> + return pkt_len;
>>> + }
>>>
>>> skb = virtio_transport_alloc_skb(info, pkt_len,
>>> src_cid, src_port,
>>> dst_cid, dst_port,
>>> &err);
>>> if (!skb) {
>>> - virtio_transport_put_credit(vvs, pkt_len);
>>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>>> + virtio_transport_put_credit(vvs, pkt_len);
>>> return err;
>>> }
>>>
>>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
>>> }
>>> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
>>>
>>> +static ssize_t
>>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
>>> + struct msghdr *msg, size_t len)
>>> +{
>>> + struct virtio_vsock_sock *vvs = vsk->trans;
>>> + struct sk_buff *skb;
>>> + size_t total = 0;
>>> + u32 free_space;
>>> + int err = -EFAULT;
>>> +
>>> + spin_lock_bh(&vvs->rx_lock);
>>> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
>>> + skb = __skb_dequeue(&vvs->rx_queue);
>>> +
>>> + total = len;
>>> + if (total > skb->len - vsock_metadata(skb)->off)
>>> + total = skb->len - vsock_metadata(skb)->off;
>>> + else if (total < skb->len - vsock_metadata(skb)->off)
>>> + msg->msg_flags |= MSG_TRUNC;
>>> +
>>> + /* sk_lock is held by caller so no one else can dequeue.
>>> + * Unlock rx_lock since memcpy_to_msg() may sleep.
>>> + */
>>> + spin_unlock_bh(&vvs->rx_lock);
>>> +
>>> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
>>> + if (err)
>>> + return err;
>>> +
>>> + spin_lock_bh(&vvs->rx_lock);
>>> +
>>> + virtio_transport_dec_rx_pkt(vvs, skb);
>>> + consume_skb(skb);
>>> + }
>>> +
>>> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
>>> +
>>> + spin_unlock_bh(&vvs->rx_lock);
>>> +
>>> + if (total > 0 && msg->msg_name) {
>>> + /* Provide the address of the sender. */
>>> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
>>> +
>>> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
>>> + le32_to_cpu(vsock_hdr(skb)->src_port));
>>> + msg->msg_namelen = sizeof(*vm_addr);
>>> + }
>>> + return total;
>>> +}
>>> +
>>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
>>> +{
>>> + return virtio_transport_stream_has_data(vsk);
>>> +}
>>> +
>>> int
>>> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
>>> struct msghdr *msg,
>>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
>>> struct msghdr *msg,
>>> size_t len, int flags)
>>> {
>>> - return -EOPNOTSUPP;
>>> + struct sock *sk;
>>> + size_t err = 0;
>>> + long timeout;
>>> +
>>> + DEFINE_WAIT(wait);
>>> +
>>> + sk = &vsk->sk;
>>> + err = 0;
>>> +
>>> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
>>> + return -EOPNOTSUPP;
>>> +
>>> + lock_sock(sk);
>>> +
>>> + if (!len)
>>> + goto out;
>>> +
>>> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>>> +
>>> + while (1) {
>>> + s64 ready;
>>> +
>>> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
>>> + ready = virtio_transport_dgram_has_data(vsk);
>>> +
>>> + if (ready == 0) {
>>> + if (timeout == 0) {
>>> + err = -EAGAIN;
>>> + finish_wait(sk_sleep(sk), &wait);
>>> + break;
>>> + }
>>> +
>>> + release_sock(sk);
>>> + timeout = schedule_timeout(timeout);
>>> + lock_sock(sk);
>>> +
>>> + if (signal_pending(current)) {
>>> + err = sock_intr_errno(timeout);
>>> + finish_wait(sk_sleep(sk), &wait);
>>> + break;
>>> + } else if (timeout == 0) {
>>> + err = -EAGAIN;
>>> + finish_wait(sk_sleep(sk), &wait);
>>> + break;
>>> + }
>>> + } else {
>>> + finish_wait(sk_sleep(sk), &wait);
>>> +
>>> + if (ready < 0) {
>>> + err = -ENOMEM;
>>> + goto out;
>>> + }
>>> +
>>> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
>>> + break;
>>> + }
>>> + }
>>> +out:
>>> + release_sock(sk);
>>> + return err;
>>> }
>>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
^^^
May be, this generic data waiting logic should be in af_vsock.c, as for stream/seqpacket?
In this way, another transport which supports SOCK_DGRAM could reuse it.
>>>
>>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
>>> int virtio_transport_dgram_bind(struct vsock_sock *vsk,
>>> struct sockaddr_vm *addr)
>>> {
>>> - return -EOPNOTSUPP;
>>> + return vsock_bind_stream(vsk, addr);
>>> }
>>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
>>>
>>> bool virtio_transport_dgram_allow(u32 cid, u32 port)
>>> {
>>> - return false;
>>> + return true;
>>> }
>>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
>>>
>>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
>>> struct msghdr *msg,
>>> size_t dgram_len)
>>> {
>>> - return -EOPNOTSUPP;
>>> + struct virtio_vsock_pkt_info info = {
>>> + .op = VIRTIO_VSOCK_OP_RW,
>>> + .msg = msg,
>>> + .pkt_len = dgram_len,
>>> + .vsk = vsk,
>>> + .remote_cid = remote_addr->svm_cid,
>>> + .remote_port = remote_addr->svm_port,
>>> + };
>>> +
>>> + return virtio_transport_send_pkt_info(vsk, &info);
>>> }
>>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
>>>
>>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
>>> struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
>>> int err = 0;
>>>
>>> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
>>> + virtio_transport_recv_enqueue(vsk, skb);
>>> + sk->sk_data_ready(sk);
>>> + return err;
>>> + }
>>> +
>>> switch (le16_to_cpu(hdr->op)) {
>>> case VIRTIO_VSOCK_OP_RW:
>>> virtio_transport_recv_enqueue(vsk, skb);
>>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
>>> static bool virtio_transport_valid_type(u16 type)
>>> {
>>> return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>>> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>>> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>>> + (type == VIRTIO_VSOCK_TYPE_DGRAM);
>>> }
>>>
>>> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
>>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>>> goto free_pkt;
>>> }
>>>
>>> + if (sk->sk_type == SOCK_DGRAM) {
>>> + virtio_transport_recv_connected(sk, skb);
>>> + goto out;
>>> + }
>>> +
>>> space_available = virtio_transport_space_update(sk, skb);
>>>
>>> /* Update CID in case it has changed after a transport reset event */
>>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
>>> break;
>>> }
>>>
>>> +out:
>>> release_sock(sk);
>>>
>>> /* Release refcnt obtained when we fetched this socket out of the
>>> --
>>> 2.35.1
>>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
>> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>>
>
On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > On 16.08.2022 05:32, Bobby Eshleman wrote:
> >> CC'ing virtio-dev@lists.oasis-open.org
> >>
> >> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> >>> This patch supports dgram in virtio and on the vhost side.
> > Hello,
> >
> > sorry, i don't understand, how this maintains message boundaries? Or it
> > is unnecessary for SOCK_DGRAM?
> >
> > Thanks
> >>>
> >>> Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> >>> Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >>> ---
> >>> drivers/vhost/vsock.c | 2 +-
> >>> include/net/af_vsock.h | 2 +
> >>> include/uapi/linux/virtio_vsock.h | 1 +
> >>> net/vmw_vsock/af_vsock.c | 26 +++-
> >>> net/vmw_vsock/virtio_transport.c | 2 +-
> >>> net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
> >>> 6 files changed, 186 insertions(+), 20 deletions(-)
> >>>
> >>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >>> index a5d1bdb786fe..3dc72a5647ca 100644
> >>> --- a/drivers/vhost/vsock.c
> >>> +++ b/drivers/vhost/vsock.c
> >>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> >>> int ret;
> >>>
> >>> ret = vsock_core_register(&vhost_transport.transport,
> >>> - VSOCK_TRANSPORT_F_H2G);
> >>> + VSOCK_TRANSPORT_F_H2G | VSOCK_TRANSPORT_F_DGRAM);
> >>> if (ret < 0)
> >>> return ret;
> >>>
> >>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> >>> index 1c53c4c4d88f..37e55c81e4df 100644
> >>> --- a/include/net/af_vsock.h
> >>> +++ b/include/net/af_vsock.h
> >>> @@ -78,6 +78,8 @@ struct vsock_sock {
> >>> s64 vsock_stream_has_data(struct vsock_sock *vsk);
> >>> s64 vsock_stream_has_space(struct vsock_sock *vsk);
> >>> struct sock *vsock_create_connected(struct sock *parent);
> >>> +int vsock_bind_stream(struct vsock_sock *vsk,
> >>> + struct sockaddr_vm *addr);
> >>>
> >>> /**** TRANSPORT ****/
> >>>
> >>> diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
> >>> index 857df3a3a70d..0975b9c88292 100644
> >>> --- a/include/uapi/linux/virtio_vsock.h
> >>> +++ b/include/uapi/linux/virtio_vsock.h
> >>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> >>> enum virtio_vsock_type {
> >>> VIRTIO_VSOCK_TYPE_STREAM = 1,
> >>> VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> >>> + VIRTIO_VSOCK_TYPE_DGRAM = 3,
> >>> };
> >>>
> >>> enum virtio_vsock_op {
> >>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >>> index 1893f8aafa48..87e4ae1866d3 100644
> >>> --- a/net/vmw_vsock/af_vsock.c
> >>> +++ b/net/vmw_vsock/af_vsock.c
> >>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> >>> return 0;
> >>> }
> >>>
> >>> +int vsock_bind_stream(struct vsock_sock *vsk,
> >>> + struct sockaddr_vm *addr)
> >>> +{
> >>> + int retval;
> >>> +
> >>> + spin_lock_bh(&vsock_table_lock);
> >>> + retval = __vsock_bind_connectible(vsk, addr);
> >>> + spin_unlock_bh(&vsock_table_lock);
> >>> +
> >>> + return retval;
> >>> +}
> >>> +EXPORT_SYMBOL(vsock_bind_stream);
> >>> +
> >>> static int __vsock_bind_dgram(struct vsock_sock *vsk,
> >>> struct sockaddr_vm *addr)
> >>> {
> >>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport *t, int features)
> >>> }
> >>>
> >>> if (features & VSOCK_TRANSPORT_F_DGRAM) {
> >>> - if (t_dgram) {
> >>> - err = -EBUSY;
> >>> - goto err_busy;
> >>> + /* TODO: always chose the G2H variant over others, support nesting later */
> >>> + if (features & VSOCK_TRANSPORT_F_G2H) {
> >>> + if (t_dgram)
> >>> + pr_warn("virtio_vsock: t_dgram already set\n");
> >>> + t_dgram = t;
> >>> + }
> >>> +
> >>> + if (!t_dgram) {
> >>> + t_dgram = t;
> >>> }
> >>> - t_dgram = t;
> >>> }
> >>>
> >>> if (features & VSOCK_TRANSPORT_F_LOCAL) {
> >>> diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> >>> index 073314312683..d4526ca462d2 100644
> >>> --- a/net/vmw_vsock/virtio_transport.c
> >>> +++ b/net/vmw_vsock/virtio_transport.c
> >>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> >>> return -ENOMEM;
> >>>
> >>> ret = vsock_core_register(&virtio_transport.transport,
> >>> - VSOCK_TRANSPORT_F_G2H);
> >>> + VSOCK_TRANSPORT_F_G2H | VSOCK_TRANSPORT_F_DGRAM);
> >>> if (ret)
> >>> goto out_wq;
> >>>
> >>> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >>> index bdf16fff054f..aedb48728677 100644
> >>> --- a/net/vmw_vsock/virtio_transport_common.c
> >>> +++ b/net/vmw_vsock/virtio_transport_common.c
> >>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> >>>
> >>> static u16 virtio_transport_get_type(struct sock *sk)
> >>> {
> >>> - if (sk->sk_type == SOCK_STREAM)
> >>> + if (sk->sk_type == SOCK_DGRAM)
> >>> + return VIRTIO_VSOCK_TYPE_DGRAM;
> >>> + else if (sk->sk_type == SOCK_STREAM)
> >>> return VIRTIO_VSOCK_TYPE_STREAM;
> >>> else
> >>> return VIRTIO_VSOCK_TYPE_SEQPACKET;
> >>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> >>> vvs = vsk->trans;
> >>>
> >>> /* we can send less than pkt_len bytes */
> >>> - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> >>> - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >>> + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >>> + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> >>> + else
> >>> + return 0;
> >>> + }
> >>>
> >>> - /* virtio_transport_get_credit might return less than pkt_len credit */
> >>> - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> >>> + /* virtio_transport_get_credit might return less than pkt_len credit */
> >>> + pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> >>>
> >>> - /* Do not send zero length OP_RW pkt */
> >>> - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >>> - return pkt_len;
> >>> + /* Do not send zero length OP_RW pkt */
> >>> + if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> >>> + return pkt_len;
> >>> + }
> >>>
> >>> skb = virtio_transport_alloc_skb(info, pkt_len,
> >>> src_cid, src_port,
> >>> dst_cid, dst_port,
> >>> &err);
> >>> if (!skb) {
> >>> - virtio_transport_put_credit(vvs, pkt_len);
> >>> + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> >>> + virtio_transport_put_credit(vvs, pkt_len);
> >>> return err;
> >>> }
> >>>
> >>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> >>> }
> >>> EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> >>>
> >>> +static ssize_t
> >>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> >>> + struct msghdr *msg, size_t len)
> >>> +{
> >>> + struct virtio_vsock_sock *vvs = vsk->trans;
> >>> + struct sk_buff *skb;
> >>> + size_t total = 0;
> >>> + u32 free_space;
> >>> + int err = -EFAULT;
> >>> +
> >>> + spin_lock_bh(&vvs->rx_lock);
> >>> + if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
> >>> + skb = __skb_dequeue(&vvs->rx_queue);
> >>> +
> >>> + total = len;
> >>> + if (total > skb->len - vsock_metadata(skb)->off)
> >>> + total = skb->len - vsock_metadata(skb)->off;
> >>> + else if (total < skb->len - vsock_metadata(skb)->off)
> >>> + msg->msg_flags |= MSG_TRUNC;
> >>> +
> >>> + /* sk_lock is held by caller so no one else can dequeue.
> >>> + * Unlock rx_lock since memcpy_to_msg() may sleep.
> >>> + */
> >>> + spin_unlock_bh(&vvs->rx_lock);
> >>> +
> >>> + err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
> >>> + if (err)
> >>> + return err;
> >>> +
> >>> + spin_lock_bh(&vvs->rx_lock);
> >>> +
> >>> + virtio_transport_dec_rx_pkt(vvs, skb);
> >>> + consume_skb(skb);
> >>> + }
> >>> +
> >>> + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
> >>> +
> >>> + spin_unlock_bh(&vvs->rx_lock);
> >>> +
> >>> + if (total > 0 && msg->msg_name) {
> >>> + /* Provide the address of the sender. */
> >>> + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
> >>> +
> >>> + vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
> >>> + le32_to_cpu(vsock_hdr(skb)->src_port));
> >>> + msg->msg_namelen = sizeof(*vm_addr);
> >>> + }
> >>> + return total;
> >>> +}
> >>> +
> >>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
> >>> +{
> >>> + return virtio_transport_stream_has_data(vsk);
> >>> +}
> >>> +
> >>> int
> >>> virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> >>> struct msghdr *msg,
> >>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
> >>> struct msghdr *msg,
> >>> size_t len, int flags)
> >>> {
> >>> - return -EOPNOTSUPP;
> >>> + struct sock *sk;
> >>> + size_t err = 0;
> >>> + long timeout;
> >>> +
> >>> + DEFINE_WAIT(wait);
> >>> +
> >>> + sk = &vsk->sk;
> >>> + err = 0;
> >>> +
> >>> + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + lock_sock(sk);
> >>> +
> >>> + if (!len)
> >>> + goto out;
> >>> +
> >>> + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> >>> +
> >>> + while (1) {
> >>> + s64 ready;
> >>> +
> >>> + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
> >>> + ready = virtio_transport_dgram_has_data(vsk);
> >>> +
> >>> + if (ready == 0) {
> >>> + if (timeout == 0) {
> >>> + err = -EAGAIN;
> >>> + finish_wait(sk_sleep(sk), &wait);
> >>> + break;
> >>> + }
> >>> +
> >>> + release_sock(sk);
> >>> + timeout = schedule_timeout(timeout);
> >>> + lock_sock(sk);
> >>> +
> >>> + if (signal_pending(current)) {
> >>> + err = sock_intr_errno(timeout);
> >>> + finish_wait(sk_sleep(sk), &wait);
> >>> + break;
> >>> + } else if (timeout == 0) {
> >>> + err = -EAGAIN;
> >>> + finish_wait(sk_sleep(sk), &wait);
> >>> + break;
> >>> + }
> >>> + } else {
> >>> + finish_wait(sk_sleep(sk), &wait);
> >>> +
> >>> + if (ready < 0) {
> >>> + err = -ENOMEM;
> >>> + goto out;
> >>> + }
> >>> +
> >>> + err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
> >>> + break;
> >>> + }
> >>> + }
> >>> +out:
> >>> + release_sock(sk);
> >>> + return err;
> >>> }
> >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> ^^^
> May be, this generic data waiting logic should be in af_vsock.c, as for stream/seqpacket?
> In this way, another transport which supports SOCK_DGRAM could reuse it.
I think that is a great idea. I'll test that change for v2.
Thanks.
> >>>
> >>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> >>> int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> >>> struct sockaddr_vm *addr)
> >>> {
> >>> - return -EOPNOTSUPP;
> >>> + return vsock_bind_stream(vsk, addr);
> >>> }
> >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> >>>
> >>> bool virtio_transport_dgram_allow(u32 cid, u32 port)
> >>> {
> >>> - return false;
> >>> + return true;
> >>> }
> >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> >>>
> >>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
> >>> struct msghdr *msg,
> >>> size_t dgram_len)
> >>> {
> >>> - return -EOPNOTSUPP;
> >>> + struct virtio_vsock_pkt_info info = {
> >>> + .op = VIRTIO_VSOCK_OP_RW,
> >>> + .msg = msg,
> >>> + .pkt_len = dgram_len,
> >>> + .vsk = vsk,
> >>> + .remote_cid = remote_addr->svm_cid,
> >>> + .remote_port = remote_addr->svm_port,
> >>> + };
> >>> +
> >>> + return virtio_transport_send_pkt_info(vsk, &info);
> >>> }
> >>> EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> >>>
> >>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
> >>> struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> >>> int err = 0;
> >>>
> >>> + if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
> >>> + virtio_transport_recv_enqueue(vsk, skb);
> >>> + sk->sk_data_ready(sk);
> >>> + return err;
> >>> + }
> >>> +
> >>> switch (le16_to_cpu(hdr->op)) {
> >>> case VIRTIO_VSOCK_OP_RW:
> >>> virtio_transport_recv_enqueue(vsk, skb);
> >>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
> >>> static bool virtio_transport_valid_type(u16 type)
> >>> {
> >>> return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> >>> - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> >>> + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> >>> + (type == VIRTIO_VSOCK_TYPE_DGRAM);
> >>> }
> >>>
> >>> /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
> >>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >>> goto free_pkt;
> >>> }
> >>>
> >>> + if (sk->sk_type == SOCK_DGRAM) {
> >>> + virtio_transport_recv_connected(sk, skb);
> >>> + goto out;
> >>> + }
> >>> +
> >>> space_available = virtio_transport_space_update(sk, skb);
> >>>
> >>> /* Update CID in case it has changed after a transport reset event */
> >>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> >>> break;
> >>> }
> >>>
> >>> +out:
> >>> release_sock(sk);
> >>>
> >>> /* Release refcnt obtained when we fetched this socket out of the
> >>> --
> >>> 2.35.1
> >>>
> >>
> >> ---------------------------------------------------------------------
> >> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> >> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >>
> >
>
On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote:
> On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> > On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > > CC'ing virtio-dev@lists.oasis-open.org
> > > >
> > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> > > > > This patch supports dgram in virtio and on the vhost side.
> > > Hello,
> > >
> > > sorry, i don't understand, how this maintains message boundaries?
> > > Or it
> > > is unnecessary for SOCK_DGRAM?
> > >
> > > Thanks
> > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > > > > ---
> > > > > drivers/vhost/vsock.c | 2 +-
> > > > > include/net/af_vsock.h | 2 +
> > > > > include/uapi/linux/virtio_vsock.h | 1 +
> > > > > net/vmw_vsock/af_vsock.c | 26 +++-
> > > > > net/vmw_vsock/virtio_transport.c | 2 +-
> > > > > net/vmw_vsock/virtio_transport_common.c | 173
> > > > > ++++++++++++++++++++++--
> > > > > 6 files changed, 186 insertions(+), 20 deletions(-)
> > > > >
> > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > > --- a/drivers/vhost/vsock.c
> > > > > +++ b/drivers/vhost/vsock.c
> > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> > > > > int ret;
> > > > >
> > > > > ret = vsock_core_register(&vhost_transport.transport,
> > > > > - VSOCK_TRANSPORT_F_H2G);
> > > > > + VSOCK_TRANSPORT_F_H2G |
> > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > if (ret < 0)
> > > > > return ret;
> > > > >
> > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > > --- a/include/net/af_vsock.h
> > > > > +++ b/include/net/af_vsock.h
> > > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > > struct sock *vsock_create_connected(struct sock *parent);
> > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > + struct sockaddr_vm *addr);
> > > > >
> > > > > /**** TRANSPORT ****/
> > > > >
> > > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > > b/include/uapi/linux/virtio_vsock.h
> > > > > index 857df3a3a70d..0975b9c88292 100644
> > > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > > enum virtio_vsock_type {
> > > > > VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > > };
> > > > >
> > > > > enum virtio_vsock_op {
> > > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > > b/net/vmw_vsock/af_vsock.c
> > > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > @@ -675,6 +675,19 @@ static int
> > > > > __vsock_bind_connectible(struct vsock_sock *vsk,
> > > > > return 0;
> > > > > }
> > > > >
> > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > + struct sockaddr_vm *addr)
> > > > > +{
> > > > > + int retval;
> > > > > +
> > > > > + spin_lock_bh(&vsock_table_lock);
> > > > > + retval = __vsock_bind_connectible(vsk, addr);
> > > > > + spin_unlock_bh(&vsock_table_lock);
> > > > > +
> > > > > + return retval;
> > > > > +}
> > > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > > +
> > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > > struct sockaddr_vm *addr)
> > > > > {
> > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct
> > > > > vsock_transport *t, int features)
> > > > > }
> > > > >
> > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > > - if (t_dgram) {
> > > > > - err = -EBUSY;
> > > > > - goto err_busy;
> > > > > + /* TODO: always chose the G2H variant over
> > > > > others, support nesting later */
> > > > > + if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > > + if (t_dgram)
> > > > > + pr_warn("virtio_vsock: t_dgram
> > > > > already set\n");
> > > > > + t_dgram = t;
> > > > > + }
> > > > > +
> > > > > + if (!t_dgram) {
> > > > > + t_dgram = t;
> > > > > }
> > > > > - t_dgram = t;
> > > > > }
> > > > >
> > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > > b/net/vmw_vsock/virtio_transport.c
> > > > > index 073314312683..d4526ca462d2 100644
> > > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> > > > > return -ENOMEM;
> > > > >
> > > > > ret = vsock_core_register(&virtio_transport.transport,
> > > > > - VSOCK_TRANSPORT_F_G2H);
> > > > > + VSOCK_TRANSPORT_F_G2H |
> > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > if (ret)
> > > > > goto out_wq;
> > > > >
> > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > > index bdf16fff054f..aedb48728677 100644
> > > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > > @@ -229,7 +229,9 @@
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > > >
> > > > > static u16 virtio_transport_get_type(struct sock *sk)
> > > > > {
> > > > > - if (sk->sk_type == SOCK_STREAM)
> > > > > + if (sk->sk_type == SOCK_DGRAM)
> > > > > + return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > > + else if (sk->sk_type == SOCK_STREAM)
> > > > > return VIRTIO_VSOCK_TYPE_STREAM;
> > > > > else
> > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > > @@ -287,22 +289,29 @@ static int
> > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > > vvs = vsk->trans;
> > > > >
> > > > > /* we can send less than pkt_len bytes */
> > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > + pkt_len =
> > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > + else
> > > > > + return 0;
> > > > > + }
> > > > >
> > > > > - /* virtio_transport_get_credit might return less than
> > > > > pkt_len credit */
> > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > + /* virtio_transport_get_credit might return
> > > > > less than pkt_len credit */
> > > > > + pkt_len = virtio_transport_get_credit(vvs,
> > > > > pkt_len);
> > > > >
> > > > > - /* Do not send zero length OP_RW pkt */
> > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > - return pkt_len;
> > > > > + /* Do not send zero length OP_RW pkt */
> > > > > + if (pkt_len == 0 && info->op ==
> > > > > VIRTIO_VSOCK_OP_RW)
> > > > > + return pkt_len;
> > > > > + }
> > > > >
> > > > > skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > > src_cid, src_port,
> > > > > dst_cid, dst_port,
> > > > > &err);
> > > > > if (!skb) {
> > > > > - virtio_transport_put_credit(vvs, pkt_len);
> > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > + virtio_transport_put_credit(vvs,
> > > > > pkt_len);
> > > > > return err;
> > > > > }
> > > > >
> > > > > @@ -586,6 +595,61 @@
> > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> > > > > }
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > > >
> > > > > +static ssize_t
> > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> > > > > + struct msghdr *msg, size_t
> > > > > len)
> > > > > +{
> > > > > + struct virtio_vsock_sock *vvs = vsk->trans;
> > > > > + struct sk_buff *skb;
> > > > > + size_t total = 0;
> > > > > + u32 free_space;
> > > > > + int err = -EFAULT;
> > > > > +
> > > > > + spin_lock_bh(&vvs->rx_lock);
> > > > > + if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > > >rx_queue)) {
> > > > > + skb = __skb_dequeue(&vvs->rx_queue);
> > > > > +
> > > > > + total = len;
> > > > > + if (total > skb->len - vsock_metadata(skb)-
> > > > > >off)
> > > > > + total = skb->len - vsock_metadata(skb)-
> > > > > >off;
> > > > > + else if (total < skb->len -
> > > > > vsock_metadata(skb)->off)
> > > > > + msg->msg_flags |= MSG_TRUNC;
> > > > > +
> > > > > + /* sk_lock is held by caller so no one else can
> > > > > dequeue.
> > > > > + * Unlock rx_lock since memcpy_to_msg() may
> > > > > sleep.
> > > > > + */
> > > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > > +
> > > > > + err = memcpy_to_msg(msg, skb->data +
> > > > > vsock_metadata(skb)->off, total);
> > > > > + if (err)
> > > > > + return err;
> > > > > +
> > > > > + spin_lock_bh(&vvs->rx_lock);
> > > > > +
> > > > > + virtio_transport_dec_rx_pkt(vvs, skb);
> > > > > + consume_skb(skb);
> > > > > + }
> > > > > +
> > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > > >last_fwd_cnt);
> > > > > +
> > > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > > +
> > > > > + if (total > 0 && msg->msg_name) {
> > > > > + /* Provide the address of the sender. */
> > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > > msg->msg_name);
> > > > > +
> > > > > + vsock_addr_init(vm_addr,
> > > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > > + le32_to_cpu(vsock_hdr(skb)-
> > > > > >src_port));
> > > > > + msg->msg_namelen = sizeof(*vm_addr);
> > > > > + }
> > > > > + return total;
> > > > > +}
> > > > > +
> > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock
> > > > > *vsk)
> > > > > +{
> > > > > + return virtio_transport_stream_has_data(vsk);
> > > > > +}
> > > > > +
> > > > > int
> > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> > > > > struct msghdr *msg,
> > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct
> > > > > vsock_sock *vsk,
> > > > > struct msghdr *msg,
> > > > > size_t len, int flags)
> > > > > {
> > > > > - return -EOPNOTSUPP;
> > > > > + struct sock *sk;
> > > > > + size_t err = 0;
> > > > > + long timeout;
> > > > > +
> > > > > + DEFINE_WAIT(wait);
> > > > > +
> > > > > + sk = &vsk->sk;
> > > > > + err = 0;
> > > > > +
> > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > > MSG_PEEK)
> > > > > + return -EOPNOTSUPP;
> > > > > +
> > > > > + lock_sock(sk);
> > > > > +
> > > > > + if (!len)
> > > > > + goto out;
> > > > > +
> > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > > +
> > > > > + while (1) {
> > > > > + s64 ready;
> > > > > +
> > > > > + prepare_to_wait(sk_sleep(sk), &wait,
> > > > > TASK_INTERRUPTIBLE);
> > > > > + ready = virtio_transport_dgram_has_data(vsk);
> > > > > +
> > > > > + if (ready == 0) {
> > > > > + if (timeout == 0) {
> > > > > + err = -EAGAIN;
> > > > > + finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > + break;
> > > > > + }
> > > > > +
> > > > > + release_sock(sk);
> > > > > + timeout = schedule_timeout(timeout);
> > > > > + lock_sock(sk);
> > > > > +
> > > > > + if (signal_pending(current)) {
> > > > > + err = sock_intr_errno(timeout);
> > > > > + finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > + break;
> > > > > + } else if (timeout == 0) {
> > > > > + err = -EAGAIN;
> > > > > + finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > + break;
> > > > > + }
> > > > > + } else {
> > > > > + finish_wait(sk_sleep(sk), &wait);
> > > > > +
> > > > > + if (ready < 0) {
> > > > > + err = -ENOMEM;
> > > > > + goto out;
> > > > > + }
> > > > > +
> > > > > + err =
> > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > > + break;
> > > > > + }
> > > > > + }
> > > > > +out:
> > > > > + release_sock(sk);
> > > > > + return err;
> > > > > }
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > ^^^
> > May be, this generic data waiting logic should be in af_vsock.c, as
> > for stream/seqpacket?
> > In this way, another transport which supports SOCK_DGRAM could
> > reuse it.
>
> I think that is a great idea. I'll test that change for v2.
>
> Thanks.
Also for v2, i tested Your patchset a little bit(write here to not
spread over all mails):
1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue)
2) i can't do rmmod with the following config(after testing):
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS_COMMON=m
CONFIG_VHOST=m
CONFIG_VHOST_VSOCK=m
Guest is shutdown, but rmmod fails.
3) virtio_transport_init + virtio_transport_exit seems must be
under EXPORT_SYMBOL_GPL(), because both used in another module.
4) I tried to send 5kb(or 20kb not matter) piece of data, but got
kernel panic both in guest and later in host.
Thank You
>
> > > > >
> > > > > @@ -819,13 +942,13 @@
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > > struct sockaddr_vm *addr)
> > > > > {
> > > > > - return -EOPNOTSUPP;
> > > > > + return vsock_bind_stream(vsk, addr);
> > > > > }
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > > >
> > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > > {
> > > > > - return false;
> > > > > + return true;
> > > > > }
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > > >
> > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct
> > > > > vsock_sock *vsk,
> > > > > struct msghdr *msg,
> > > > > size_t dgram_len)
> > > > > {
> > > > > - return -EOPNOTSUPP;
> > > > > + struct virtio_vsock_pkt_info info = {
> > > > > + .op = VIRTIO_VSOCK_OP_RW,
> > > > > + .msg = msg,
> > > > > + .pkt_len = dgram_len,
> > > > > + .vsk = vsk,
> > > > > + .remote_cid = remote_addr->svm_cid,
> > > > > + .remote_port = remote_addr->svm_port,
> > > > > + };
> > > > > +
> > > > > + return virtio_transport_send_pkt_info(vsk, &info);
> > > > > }
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > > >
> > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct
> > > > > sock *sk,
> > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > > int err = 0;
> > > > >
> > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > + virtio_transport_recv_enqueue(vsk, skb);
> > > > > + sk->sk_data_ready(sk);
> > > > > + return err;
> > > > > + }
> > > > > +
> > > > > switch (le16_to_cpu(hdr->op)) {
> > > > > case VIRTIO_VSOCK_OP_RW:
> > > > > virtio_transport_recv_enqueue(vsk, skb);
> > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct
> > > > > sock *sk, struct sk_buff *skb,
> > > > > static bool virtio_transport_valid_type(u16 type)
> > > > > {
> > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > > }
> > > > >
> > > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost-
> > > > > vsock's vq->mutex
> > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct
> > > > > virtio_transport *t,
> > > > > goto free_pkt;
> > > > > }
> > > > >
> > > > > + if (sk->sk_type == SOCK_DGRAM) {
> > > > > + virtio_transport_recv_connected(sk, skb);
> > > > > + goto out;
> > > > > + }
> > > > > +
> > > > > space_available = virtio_transport_space_update(sk,
> > > > > skb);
> > > > >
> > > > > /* Update CID in case it has changed after a transport
> > > > > reset event */
> > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct
> > > > > virtio_transport *t,
> > > > > break;
> > > > > }
> > > > >
> > > > > +out:
> > > > > release_sock(sk);
> > > > >
> > > > > /* Release refcnt obtained when we fetched this socket
> > > > > out of the
> > > > > --
> > > > > 2.35.1
> > > > >
> > > >
> > > > -------------------------------------------------------------
> > > > --------
> > > > To unsubscribe, e-mail:
> > > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > > For additional commands, e-mail:
> > > > virtio-dev-help@lists.oasis-open.org
> > > >
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
>
On Thu, Aug 18, 2022 at 08:35:48AM +0000, Arseniy Krasnov wrote:
> On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote:
> > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> > > On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > > > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > > > CC'ing virtio-dev@lists.oasis-open.org
> > > > >
> > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> > > > > > This patch supports dgram in virtio and on the vhost side.
> > > > Hello,
> > > >
> > > > sorry, i don't understand, how this maintains message boundaries?
> > > > Or it
> > > > is unnecessary for SOCK_DGRAM?
> > > >
> > > > Thanks
> > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > > > Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> > > > > > ---
> > > > > > drivers/vhost/vsock.c | 2 +-
> > > > > > include/net/af_vsock.h | 2 +
> > > > > > include/uapi/linux/virtio_vsock.h | 1 +
> > > > > > net/vmw_vsock/af_vsock.c | 26 +++-
> > > > > > net/vmw_vsock/virtio_transport.c | 2 +-
> > > > > > net/vmw_vsock/virtio_transport_common.c | 173
> > > > > > ++++++++++++++++++++++--
> > > > > > 6 files changed, 186 insertions(+), 20 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > > > --- a/drivers/vhost/vsock.c
> > > > > > +++ b/drivers/vhost/vsock.c
> > > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> > > > > > int ret;
> > > > > >
> > > > > > ret = vsock_core_register(&vhost_transport.transport,
> > > > > > - VSOCK_TRANSPORT_F_H2G);
> > > > > > + VSOCK_TRANSPORT_F_H2G |
> > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > > if (ret < 0)
> > > > > > return ret;
> > > > > >
> > > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > > > --- a/include/net/af_vsock.h
> > > > > > +++ b/include/net/af_vsock.h
> > > > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > > > struct sock *vsock_create_connected(struct sock *parent);
> > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > + struct sockaddr_vm *addr);
> > > > > >
> > > > > > /**** TRANSPORT ****/
> > > > > >
> > > > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > > > b/include/uapi/linux/virtio_vsock.h
> > > > > > index 857df3a3a70d..0975b9c88292 100644
> > > > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > > > enum virtio_vsock_type {
> > > > > > VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > > > };
> > > > > >
> > > > > > enum virtio_vsock_op {
> > > > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > > > b/net/vmw_vsock/af_vsock.c
> > > > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > > @@ -675,6 +675,19 @@ static int
> > > > > > __vsock_bind_connectible(struct vsock_sock *vsk,
> > > > > > return 0;
> > > > > > }
> > > > > >
> > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > + struct sockaddr_vm *addr)
> > > > > > +{
> > > > > > + int retval;
> > > > > > +
> > > > > > + spin_lock_bh(&vsock_table_lock);
> > > > > > + retval = __vsock_bind_connectible(vsk, addr);
> > > > > > + spin_unlock_bh(&vsock_table_lock);
> > > > > > +
> > > > > > + return retval;
> > > > > > +}
> > > > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > > > +
> > > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > > > struct sockaddr_vm *addr)
> > > > > > {
> > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct
> > > > > > vsock_transport *t, int features)
> > > > > > }
> > > > > >
> > > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > > > - if (t_dgram) {
> > > > > > - err = -EBUSY;
> > > > > > - goto err_busy;
> > > > > > + /* TODO: always chose the G2H variant over
> > > > > > others, support nesting later */
> > > > > > + if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > > > + if (t_dgram)
> > > > > > + pr_warn("virtio_vsock: t_dgram
> > > > > > already set\n");
> > > > > > + t_dgram = t;
> > > > > > + }
> > > > > > +
> > > > > > + if (!t_dgram) {
> > > > > > + t_dgram = t;
> > > > > > }
> > > > > > - t_dgram = t;
> > > > > > }
> > > > > >
> > > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > > > b/net/vmw_vsock/virtio_transport.c
> > > > > > index 073314312683..d4526ca462d2 100644
> > > > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> > > > > > return -ENOMEM;
> > > > > >
> > > > > > ret = vsock_core_register(&virtio_transport.transport,
> > > > > > - VSOCK_TRANSPORT_F_G2H);
> > > > > > + VSOCK_TRANSPORT_F_G2H |
> > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > > if (ret)
> > > > > > goto out_wq;
> > > > > >
> > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > > > index bdf16fff054f..aedb48728677 100644
> > > > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > > > @@ -229,7 +229,9 @@
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > > > >
> > > > > > static u16 virtio_transport_get_type(struct sock *sk)
> > > > > > {
> > > > > > - if (sk->sk_type == SOCK_STREAM)
> > > > > > + if (sk->sk_type == SOCK_DGRAM)
> > > > > > + return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > > > + else if (sk->sk_type == SOCK_STREAM)
> > > > > > return VIRTIO_VSOCK_TYPE_STREAM;
> > > > > > else
> > > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > > > @@ -287,22 +289,29 @@ static int
> > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > > > vvs = vsk->trans;
> > > > > >
> > > > > > /* we can send less than pkt_len bytes */
> > > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > + pkt_len =
> > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > + else
> > > > > > + return 0;
> > > > > > + }
> > > > > >
> > > > > > - /* virtio_transport_get_credit might return less than
> > > > > > pkt_len credit */
> > > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > + /* virtio_transport_get_credit might return
> > > > > > less than pkt_len credit */
> > > > > > + pkt_len = virtio_transport_get_credit(vvs,
> > > > > > pkt_len);
> > > > > >
> > > > > > - /* Do not send zero length OP_RW pkt */
> > > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > > - return pkt_len;
> > > > > > + /* Do not send zero length OP_RW pkt */
> > > > > > + if (pkt_len == 0 && info->op ==
> > > > > > VIRTIO_VSOCK_OP_RW)
> > > > > > + return pkt_len;
> > > > > > + }
> > > > > >
> > > > > > skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > > > src_cid, src_port,
> > > > > > dst_cid, dst_port,
> > > > > > &err);
> > > > > > if (!skb) {
> > > > > > - virtio_transport_put_credit(vvs, pkt_len);
> > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > + virtio_transport_put_credit(vvs,
> > > > > > pkt_len);
> > > > > > return err;
> > > > > > }
> > > > > >
> > > > > > @@ -586,6 +595,61 @@
> > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> > > > > > }
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > > > >
> > > > > > +static ssize_t
> > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> > > > > > + struct msghdr *msg, size_t
> > > > > > len)
> > > > > > +{
> > > > > > + struct virtio_vsock_sock *vvs = vsk->trans;
> > > > > > + struct sk_buff *skb;
> > > > > > + size_t total = 0;
> > > > > > + u32 free_space;
> > > > > > + int err = -EFAULT;
> > > > > > +
> > > > > > + spin_lock_bh(&vvs->rx_lock);
> > > > > > + if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > > > >rx_queue)) {
> > > > > > + skb = __skb_dequeue(&vvs->rx_queue);
> > > > > > +
> > > > > > + total = len;
> > > > > > + if (total > skb->len - vsock_metadata(skb)-
> > > > > > >off)
> > > > > > + total = skb->len - vsock_metadata(skb)-
> > > > > > >off;
> > > > > > + else if (total < skb->len -
> > > > > > vsock_metadata(skb)->off)
> > > > > > + msg->msg_flags |= MSG_TRUNC;
> > > > > > +
> > > > > > + /* sk_lock is held by caller so no one else can
> > > > > > dequeue.
> > > > > > + * Unlock rx_lock since memcpy_to_msg() may
> > > > > > sleep.
> > > > > > + */
> > > > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > > > +
> > > > > > + err = memcpy_to_msg(msg, skb->data +
> > > > > > vsock_metadata(skb)->off, total);
> > > > > > + if (err)
> > > > > > + return err;
> > > > > > +
> > > > > > + spin_lock_bh(&vvs->rx_lock);
> > > > > > +
> > > > > > + virtio_transport_dec_rx_pkt(vvs, skb);
> > > > > > + consume_skb(skb);
> > > > > > + }
> > > > > > +
> > > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > > > >last_fwd_cnt);
> > > > > > +
> > > > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > > > +
> > > > > > + if (total > 0 && msg->msg_name) {
> > > > > > + /* Provide the address of the sender. */
> > > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > > > msg->msg_name);
> > > > > > +
> > > > > > + vsock_addr_init(vm_addr,
> > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > > > + le32_to_cpu(vsock_hdr(skb)-
> > > > > > >src_port));
> > > > > > + msg->msg_namelen = sizeof(*vm_addr);
> > > > > > + }
> > > > > > + return total;
> > > > > > +}
> > > > > > +
> > > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock
> > > > > > *vsk)
> > > > > > +{
> > > > > > + return virtio_transport_stream_has_data(vsk);
> > > > > > +}
> > > > > > +
> > > > > > int
> > > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> > > > > > struct msghdr *msg,
> > > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct
> > > > > > vsock_sock *vsk,
> > > > > > struct msghdr *msg,
> > > > > > size_t len, int flags)
> > > > > > {
> > > > > > - return -EOPNOTSUPP;
> > > > > > + struct sock *sk;
> > > > > > + size_t err = 0;
> > > > > > + long timeout;
> > > > > > +
> > > > > > + DEFINE_WAIT(wait);
> > > > > > +
> > > > > > + sk = &vsk->sk;
> > > > > > + err = 0;
> > > > > > +
> > > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > > > MSG_PEEK)
> > > > > > + return -EOPNOTSUPP;
> > > > > > +
> > > > > > + lock_sock(sk);
> > > > > > +
> > > > > > + if (!len)
> > > > > > + goto out;
> > > > > > +
> > > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > > > +
> > > > > > + while (1) {
> > > > > > + s64 ready;
> > > > > > +
> > > > > > + prepare_to_wait(sk_sleep(sk), &wait,
> > > > > > TASK_INTERRUPTIBLE);
> > > > > > + ready = virtio_transport_dgram_has_data(vsk);
> > > > > > +
> > > > > > + if (ready == 0) {
> > > > > > + if (timeout == 0) {
> > > > > > + err = -EAGAIN;
> > > > > > + finish_wait(sk_sleep(sk),
> > > > > > &wait);
> > > > > > + break;
> > > > > > + }
> > > > > > +
> > > > > > + release_sock(sk);
> > > > > > + timeout = schedule_timeout(timeout);
> > > > > > + lock_sock(sk);
> > > > > > +
> > > > > > + if (signal_pending(current)) {
> > > > > > + err = sock_intr_errno(timeout);
> > > > > > + finish_wait(sk_sleep(sk),
> > > > > > &wait);
> > > > > > + break;
> > > > > > + } else if (timeout == 0) {
> > > > > > + err = -EAGAIN;
> > > > > > + finish_wait(sk_sleep(sk),
> > > > > > &wait);
> > > > > > + break;
> > > > > > + }
> > > > > > + } else {
> > > > > > + finish_wait(sk_sleep(sk), &wait);
> > > > > > +
> > > > > > + if (ready < 0) {
> > > > > > + err = -ENOMEM;
> > > > > > + goto out;
> > > > > > + }
> > > > > > +
> > > > > > + err =
> > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > > > + break;
> > > > > > + }
> > > > > > + }
> > > > > > +out:
> > > > > > + release_sock(sk);
> > > > > > + return err;
> > > > > > }
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > > ^^^
> > > May be, this generic data waiting logic should be in af_vsock.c, as
> > > for stream/seqpacket?
> > > In this way, another transport which supports SOCK_DGRAM could
> > > reuse it.
> >
> > I think that is a great idea. I'll test that change for v2.
> >
> > Thanks.
>
> Also for v2, i tested Your patchset a little bit(write here to not
> spread over all mails):
> 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue)
I will investigate.
> 2) i can't do rmmod with the following config(after testing):
> CONFIG_VSOCKETS=m
> CONFIG_VIRTIO_VSOCKETS=m
> CONFIG_VIRTIO_VSOCKETS_COMMON=m
> CONFIG_VHOST=m
> CONFIG_VHOST_VSOCK=m
> Guest is shutdown, but rmmod fails.
> 3) virtio_transport_init + virtio_transport_exit seems must be
> under EXPORT_SYMBOL_GPL(), because both used in another module.
Definitely, will fix.
> 4) I tried to send 5kb(or 20kb not matter) piece of data, but got
> kernel panic both in guest and later in host.
>
Thanks for catching that. I can reproduce it intermittently, but only
for seqpacket. Did you happen to see this for other socket types as
well?
Thanks
> Thank You
> >
> > > > > >
> > > > > > @@ -819,13 +942,13 @@
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > > > struct sockaddr_vm *addr)
> > > > > > {
> > > > > > - return -EOPNOTSUPP;
> > > > > > + return vsock_bind_stream(vsk, addr);
> > > > > > }
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > > > >
> > > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > > > {
> > > > > > - return false;
> > > > > > + return true;
> > > > > > }
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > > > >
> > > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct
> > > > > > vsock_sock *vsk,
> > > > > > struct msghdr *msg,
> > > > > > size_t dgram_len)
> > > > > > {
> > > > > > - return -EOPNOTSUPP;
> > > > > > + struct virtio_vsock_pkt_info info = {
> > > > > > + .op = VIRTIO_VSOCK_OP_RW,
> > > > > > + .msg = msg,
> > > > > > + .pkt_len = dgram_len,
> > > > > > + .vsk = vsk,
> > > > > > + .remote_cid = remote_addr->svm_cid,
> > > > > > + .remote_port = remote_addr->svm_port,
> > > > > > + };
> > > > > > +
> > > > > > + return virtio_transport_send_pkt_info(vsk, &info);
> > > > > > }
> > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > > > >
> > > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct
> > > > > > sock *sk,
> > > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > > > int err = 0;
> > > > > >
> > > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > + virtio_transport_recv_enqueue(vsk, skb);
> > > > > > + sk->sk_data_ready(sk);
> > > > > > + return err;
> > > > > > + }
> > > > > > +
> > > > > > switch (le16_to_cpu(hdr->op)) {
> > > > > > case VIRTIO_VSOCK_OP_RW:
> > > > > > virtio_transport_recv_enqueue(vsk, skb);
> > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct
> > > > > > sock *sk, struct sk_buff *skb,
> > > > > > static bool virtio_transport_valid_type(u16 type)
> > > > > > {
> > > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > > > }
> > > > > >
> > > > > > /* We are under the virtio-vsock's vsock->rx_lock or vhost-
> > > > > > vsock's vq->mutex
> > > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct
> > > > > > virtio_transport *t,
> > > > > > goto free_pkt;
> > > > > > }
> > > > > >
> > > > > > + if (sk->sk_type == SOCK_DGRAM) {
> > > > > > + virtio_transport_recv_connected(sk, skb);
> > > > > > + goto out;
> > > > > > + }
> > > > > > +
> > > > > > space_available = virtio_transport_space_update(sk,
> > > > > > skb);
> > > > > >
> > > > > > /* Update CID in case it has changed after a transport
> > > > > > reset event */
> > > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct
> > > > > > virtio_transport *t,
> > > > > > break;
> > > > > > }
> > > > > >
> > > > > > +out:
> > > > > > release_sock(sk);
> > > > > >
> > > > > > /* Release refcnt obtained when we fetched this socket
> > > > > > out of the
> > > > > > --
> > > > > > 2.35.1
> > > > > >
> > > > >
> > > > > -------------------------------------------------------------
> > > > > --------
> > > > > To unsubscribe, e-mail:
> > > > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > For additional commands, e-mail:
> > > > > virtio-dev-help@lists.oasis-open.org
> > > > >
> >
> > ---------------------------------------------------------------------
> > To unsubscribe, e-mail: virtio-dev-unsubscribe@lists.oasis-open.org
> > For additional commands, e-mail: virtio-dev-help@lists.oasis-open.org
> >
On Tue, 2022-08-16 at 20:52 +0000, Bobby Eshleman wrote:
> On Thu, Aug 18, 2022 at 08:35:48AM +0000, Arseniy Krasnov wrote:
> > On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote:
> > > On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> > > > On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > > > > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > > > > CC'ing virtio-dev@lists.oasis-open.org
> > > > > >
> > > > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman
> > > > > > wrote:
> > > > > > > This patch supports dgram in virtio and on the vhost
> > > > > > > side.
> > > > > Hello,
> > > > >
> > > > > sorry, i don't understand, how this maintains message
> > > > > boundaries?
> > > > > Or it
> > > > > is unnecessary for SOCK_DGRAM?
> > > > >
> > > > > Thanks
> > > > > > > Signed-off-by: Jiang Wang <jiang.wang@bytedance.com>
> > > > > > > Signed-off-by: Bobby Eshleman <
> > > > > > > bobby.eshleman@bytedance.com>
> > > > > > > ---
> > > > > > > drivers/vhost/vsock.c | 2 +-
> > > > > > > include/net/af_vsock.h | 2 +
> > > > > > > include/uapi/linux/virtio_vsock.h | 1 +
> > > > > > > net/vmw_vsock/af_vsock.c | 26 +++-
> > > > > > > net/vmw_vsock/virtio_transport.c | 2 +-
> > > > > > > net/vmw_vsock/virtio_transport_common.c | 173
> > > > > > > ++++++++++++++++++++++--
> > > > > > > 6 files changed, 186 insertions(+), 20 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/vhost/vsock.c
> > > > > > > b/drivers/vhost/vsock.c
> > > > > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > > > > --- a/drivers/vhost/vsock.c
> > > > > > > +++ b/drivers/vhost/vsock.c
> > > > > > > @@ -925,7 +925,7 @@ static int __init
> > > > > > > vhost_vsock_init(void)
> > > > > > > int ret;
> > > > > > >
> > > > > > > ret = vsock_core_register(&vhost_transport.transport,
> > > > > > > - VSOCK_TRANSPORT_F_H2G);
> > > > > > > + VSOCK_TRANSPORT_F_H2G |
> > > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > > > if (ret < 0)
> > > > > > > return ret;
> > > > > > >
> > > > > > > diff --git a/include/net/af_vsock.h
> > > > > > > b/include/net/af_vsock.h
> > > > > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > > > > --- a/include/net/af_vsock.h
> > > > > > > +++ b/include/net/af_vsock.h
> > > > > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > > > > s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > > > > s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > > > > struct sock *vsock_create_connected(struct sock
> > > > > > > *parent);
> > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > > + struct sockaddr_vm *addr);
> > > > > > >
> > > > > > > /**** TRANSPORT ****/
> > > > > > >
> > > > > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > > > > b/include/uapi/linux/virtio_vsock.h
> > > > > > > index 857df3a3a70d..0975b9c88292 100644
> > > > > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > > > > enum virtio_vsock_type {
> > > > > > > VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > > > > VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > > > > + VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > > > > };
> > > > > > >
> > > > > > > enum virtio_vsock_op {
> > > > > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > > > > b/net/vmw_vsock/af_vsock.c
> > > > > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > > > @@ -675,6 +675,19 @@ static int
> > > > > > > __vsock_bind_connectible(struct vsock_sock *vsk,
> > > > > > > return 0;
> > > > > > > }
> > > > > > >
> > > > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > > > + struct sockaddr_vm *addr)
> > > > > > > +{
> > > > > > > + int retval;
> > > > > > > +
> > > > > > > + spin_lock_bh(&vsock_table_lock);
> > > > > > > + retval = __vsock_bind_connectible(vsk, addr);
> > > > > > > + spin_unlock_bh(&vsock_table_lock);
> > > > > > > +
> > > > > > > + return retval;
> > > > > > > +}
> > > > > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > > > > +
> > > > > > > static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > > > > struct sockaddr_vm *addr)
> > > > > > > {
> > > > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const
> > > > > > > struct
> > > > > > > vsock_transport *t, int features)
> > > > > > > }
> > > > > > >
> > > > > > > if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > > > > - if (t_dgram) {
> > > > > > > - err = -EBUSY;
> > > > > > > - goto err_busy;
> > > > > > > + /* TODO: always chose the G2H variant over
> > > > > > > others, support nesting later */
> > > > > > > + if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > > > > + if (t_dgram)
> > > > > > > + pr_warn("virtio_vsock: t_dgram
> > > > > > > already set\n");
> > > > > > > + t_dgram = t;
> > > > > > > + }
> > > > > > > +
> > > > > > > + if (!t_dgram) {
> > > > > > > + t_dgram = t;
> > > > > > > }
> > > > > > > - t_dgram = t;
> > > > > > > }
> > > > > > >
> > > > > > > if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > > > > b/net/vmw_vsock/virtio_transport.c
> > > > > > > index 073314312683..d4526ca462d2 100644
> > > > > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > > > > @@ -850,7 +850,7 @@ static int __init
> > > > > > > virtio_vsock_init(void)
> > > > > > > return -ENOMEM;
> > > > > > >
> > > > > > > ret = vsock_core_register(&virtio_transport.transport,
> > > > > > > - VSOCK_TRANSPORT_F_G2H);
> > > > > > > + VSOCK_TRANSPORT_F_G2H |
> > > > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > > > > if (ret)
> > > > > > > goto out_wq;
> > > > > > >
> > > > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > > > > index bdf16fff054f..aedb48728677 100644
> > > > > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > > > > @@ -229,7 +229,9 @@
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > > > > >
> > > > > > > static u16 virtio_transport_get_type(struct sock *sk)
> > > > > > > {
> > > > > > > - if (sk->sk_type == SOCK_STREAM)
> > > > > > > + if (sk->sk_type == SOCK_DGRAM)
> > > > > > > + return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > > > > + else if (sk->sk_type == SOCK_STREAM)
> > > > > > > return VIRTIO_VSOCK_TYPE_STREAM;
> > > > > > > else
> > > > > > > return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > > > > @@ -287,22 +289,29 @@ static int
> > > > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > > > > vvs = vsk->trans;
> > > > > > >
> > > > > > > /* we can send less than pkt_len bytes */
> > > > > > > - if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > > > > - pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > > + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > > + pkt_len =
> > > > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > > > + else
> > > > > > > + return 0;
> > > > > > > + }
> > > > > > >
> > > > > > > - /* virtio_transport_get_credit might return less than
> > > > > > > pkt_len credit */
> > > > > > > - pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > > + /* virtio_transport_get_credit might return
> > > > > > > less than pkt_len credit */
> > > > > > > + pkt_len = virtio_transport_get_credit(vvs,
> > > > > > > pkt_len);
> > > > > > >
> > > > > > > - /* Do not send zero length OP_RW pkt */
> > > > > > > - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > > > - return pkt_len;
> > > > > > > + /* Do not send zero length OP_RW pkt */
> > > > > > > + if (pkt_len == 0 && info->op ==
> > > > > > > VIRTIO_VSOCK_OP_RW)
> > > > > > > + return pkt_len;
> > > > > > > + }
> > > > > > >
> > > > > > > skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > > > > src_cid, src_port,
> > > > > > > dst_cid, dst_port,
> > > > > > > &err);
> > > > > > > if (!skb) {
> > > > > > > - virtio_transport_put_credit(vvs, pkt_len);
> > > > > > > + if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > > > + virtio_transport_put_credit(vvs,
> > > > > > > pkt_len);
> > > > > > > return err;
> > > > > > > }
> > > > > > >
> > > > > > > @@ -586,6 +595,61 @@
> > > > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock
> > > > > > > *vsk,
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > > > > >
> > > > > > > +static ssize_t
> > > > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock
> > > > > > > *vsk,
> > > > > > > + struct msghdr *msg, size_t
> > > > > > > len)
> > > > > > > +{
> > > > > > > + struct virtio_vsock_sock *vvs = vsk->trans;
> > > > > > > + struct sk_buff *skb;
> > > > > > > + size_t total = 0;
> > > > > > > + u32 free_space;
> > > > > > > + int err = -EFAULT;
> > > > > > > +
> > > > > > > + spin_lock_bh(&vvs->rx_lock);
> > > > > > > + if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > > > > > rx_queue)) {
> > > > > > > + skb = __skb_dequeue(&vvs->rx_queue);
> > > > > > > +
> > > > > > > + total = len;
> > > > > > > + if (total > skb->len - vsock_metadata(skb)-
> > > > > > > > off)
> > > > > > > + total = skb->len - vsock_metadata(skb)-
> > > > > > > > off;
> > > > > > > + else if (total < skb->len -
> > > > > > > vsock_metadata(skb)->off)
> > > > > > > + msg->msg_flags |= MSG_TRUNC;
> > > > > > > +
> > > > > > > + /* sk_lock is held by caller so no one else can
> > > > > > > dequeue.
> > > > > > > + * Unlock rx_lock since memcpy_to_msg() may
> > > > > > > sleep.
> > > > > > > + */
> > > > > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > > > > +
> > > > > > > + err = memcpy_to_msg(msg, skb->data +
> > > > > > > vsock_metadata(skb)->off, total);
> > > > > > > + if (err)
> > > > > > > + return err;
> > > > > > > +
> > > > > > > + spin_lock_bh(&vvs->rx_lock);
> > > > > > > +
> > > > > > > + virtio_transport_dec_rx_pkt(vvs, skb);
> > > > > > > + consume_skb(skb);
> > > > > > > + }
> > > > > > > +
> > > > > > > + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > > > > > last_fwd_cnt);
> > > > > > > +
> > > > > > > + spin_unlock_bh(&vvs->rx_lock);
> > > > > > > +
> > > > > > > + if (total > 0 && msg->msg_name) {
> > > > > > > + /* Provide the address of the sender. */
> > > > > > > + DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > > > > msg->msg_name);
> > > > > > > +
> > > > > > > + vsock_addr_init(vm_addr,
> > > > > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > > > > + le32_to_cpu(vsock_hdr(skb)-
> > > > > > > > src_port));
> > > > > > > + msg->msg_namelen = sizeof(*vm_addr);
> > > > > > > + }
> > > > > > > + return total;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static s64 virtio_transport_dgram_has_data(struct
> > > > > > > vsock_sock
> > > > > > > *vsk)
> > > > > > > +{
> > > > > > > + return virtio_transport_stream_has_data(vsk);
> > > > > > > +}
> > > > > > > +
> > > > > > > int
> > > > > > > virtio_transport_seqpacket_enqueue(struct vsock_sock
> > > > > > > *vsk,
> > > > > > > struct msghdr *msg,
> > > > > > > @@ -611,7 +675,66 @@
> > > > > > > virtio_transport_dgram_dequeue(struct
> > > > > > > vsock_sock *vsk,
> > > > > > > struct msghdr *msg,
> > > > > > > size_t len, int flags)
> > > > > > > {
> > > > > > > - return -EOPNOTSUPP;
> > > > > > > + struct sock *sk;
> > > > > > > + size_t err = 0;
> > > > > > > + long timeout;
> > > > > > > +
> > > > > > > + DEFINE_WAIT(wait);
> > > > > > > +
> > > > > > > + sk = &vsk->sk;
> > > > > > > + err = 0;
> > > > > > > +
> > > > > > > + if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > > > > MSG_PEEK)
> > > > > > > + return -EOPNOTSUPP;
> > > > > > > +
> > > > > > > + lock_sock(sk);
> > > > > > > +
> > > > > > > + if (!len)
> > > > > > > + goto out;
> > > > > > > +
> > > > > > > + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > > > > +
> > > > > > > + while (1) {
> > > > > > > + s64 ready;
> > > > > > > +
> > > > > > > + prepare_to_wait(sk_sleep(sk), &wait,
> > > > > > > TASK_INTERRUPTIBLE);
> > > > > > > + ready = virtio_transport_dgram_has_data(vsk);
> > > > > > > +
> > > > > > > + if (ready == 0) {
> > > > > > > + if (timeout == 0) {
> > > > > > > + err = -EAGAIN;
> > > > > > > + finish_wait(sk_sleep(sk),
> > > > > > > &wait);
> > > > > > > + break;
> > > > > > > + }
> > > > > > > +
> > > > > > > + release_sock(sk);
> > > > > > > + timeout = schedule_timeout(timeout);
> > > > > > > + lock_sock(sk);
> > > > > > > +
> > > > > > > + if (signal_pending(current)) {
> > > > > > > + err = sock_intr_errno(timeout);
> > > > > > > + finish_wait(sk_sleep(sk),
> > > > > > > &wait);
> > > > > > > + break;
> > > > > > > + } else if (timeout == 0) {
> > > > > > > + err = -EAGAIN;
> > > > > > > + finish_wait(sk_sleep(sk),
> > > > > > > &wait);
> > > > > > > + break;
> > > > > > > + }
> > > > > > > + } else {
> > > > > > > + finish_wait(sk_sleep(sk), &wait);
> > > > > > > +
> > > > > > > + if (ready < 0) {
> > > > > > > + err = -ENOMEM;
> > > > > > > + goto out;
> > > > > > > + }
> > > > > > > +
> > > > > > > + err =
> > > > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > > > > + break;
> > > > > > > + }
> > > > > > > + }
> > > > > > > +out:
> > > > > > > + release_sock(sk);
> > > > > > > + return err;
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > > > ^^^
> > > > May be, this generic data waiting logic should be in
> > > > af_vsock.c, as
> > > > for stream/seqpacket?
> > > > In this way, another transport which supports SOCK_DGRAM could
> > > > reuse it.
> > >
> > > I think that is a great idea. I'll test that change for v2.
> > >
> > > Thanks.
> >
> > Also for v2, i tested Your patchset a little bit(write here to not
> > spread over all mails):
> > 1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue)
>
> I will investigate.
>
> > 2) i can't do rmmod with the following config(after testing):
> > CONFIG_VSOCKETS=m
> > CONFIG_VIRTIO_VSOCKETS=m
> > CONFIG_VIRTIO_VSOCKETS_COMMON=m
> > CONFIG_VHOST=m
> > CONFIG_VHOST_VSOCK=m
> > Guest is shutdown, but rmmod fails.
> > 3) virtio_transport_init + virtio_transport_exit seems must be
> > under EXPORT_SYMBOL_GPL(), because both used in another module.
>
> Definitely, will fix.
>
> > 4) I tried to send 5kb(or 20kb not matter) piece of data, but
> > got
> > kernel panic both in guest and later in host.
> >
>
> Thanks for catching that. I can reproduce it intermittently, but only
> for seqpacket. Did you happen to see this for other socket types as
> well?
>
> Thanks
I got this for SOCK_DGRAM, i didnt test seqpacket or stream.
Thanks, Arseniy
>
> > Thank You
> > > > > > >
> > > > > > > @@ -819,13 +942,13 @@
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > > > > int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > > > > struct sockaddr_vm *addr)
> > > > > > > {
> > > > > > > - return -EOPNOTSUPP;
> > > > > > > + return vsock_bind_stream(vsk, addr);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > > > > >
> > > > > > > bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > > > > {
> > > > > > > - return false;
> > > > > > > + return true;
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > > > > >
> > > > > > > @@ -861,7 +984,16 @@
> > > > > > > virtio_transport_dgram_enqueue(struct
> > > > > > > vsock_sock *vsk,
> > > > > > > struct msghdr *msg,
> > > > > > > size_t dgram_len)
> > > > > > > {
> > > > > > > - return -EOPNOTSUPP;
> > > > > > > + struct virtio_vsock_pkt_info info = {
> > > > > > > + .op = VIRTIO_VSOCK_OP_RW,
> > > > > > > + .msg = msg,
> > > > > > > + .pkt_len = dgram_len,
> > > > > > > + .vsk = vsk,
> > > > > > > + .remote_cid = remote_addr->svm_cid,
> > > > > > > + .remote_port = remote_addr->svm_port,
> > > > > > > + };
> > > > > > > +
> > > > > > > + return virtio_transport_send_pkt_info(vsk, &info);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > > > > >
> > > > > > > @@ -1165,6 +1297,12 @@
> > > > > > > virtio_transport_recv_connected(struct
> > > > > > > sock *sk,
> > > > > > > struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > > > > int err = 0;
> > > > > > >
> > > > > > > + if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > > > + virtio_transport_recv_enqueue(vsk, skb);
> > > > > > > + sk->sk_data_ready(sk);
> > > > > > > + return err;
> > > > > > > + }
> > > > > > > +
> > > > > > > switch (le16_to_cpu(hdr->op)) {
> > > > > > > case VIRTIO_VSOCK_OP_RW:
> > > > > > > virtio_transport_recv_enqueue(vsk, skb);
> > > > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct
> > > > > > > sock *sk, struct sk_buff *skb,
> > > > > > > static bool virtio_transport_valid_type(u16 type)
> > > > > > > {
> > > > > > > return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > > > > - (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > > > > + (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > > > > + (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > > > > }
> > > > > > >
> > > > > > > /* We are under the virtio-vsock's vsock->rx_lock or
> > > > > > > vhost-
> > > > > > > vsock's vq->mutex
> > > > > > > @@ -1384,6 +1523,11 @@ void
> > > > > > > virtio_transport_recv_pkt(struct
> > > > > > > virtio_transport *t,
> > > > > > > goto free_pkt;
> > > > > > > }
> > > > > > >
> > > > > > > + if (sk->sk_type == SOCK_DGRAM) {
> > > > > > > + virtio_transport_recv_connected(sk, skb);
> > > > > > > + goto out;
> > > > > > > + }
> > > > > > > +
> > > > > > > space_available = virtio_transport_space_update(sk,
> > > > > > > skb);
> > > > > > >
> > > > > > > /* Update CID in case it has changed after a transport
> > > > > > > reset event */
> > > > > > > @@ -1415,6 +1559,7 @@ void
> > > > > > > virtio_transport_recv_pkt(struct
> > > > > > > virtio_transport *t,
> > > > > > > break;
> > > > > > > }
> > > > > > >
> > > > > > > +out:
> > > > > > > release_sock(sk);
> > > > > > >
> > > > > > > /* Release refcnt obtained when we fetched this socket
> > > > > > > out of the
> > > > > > > --
> > > > > > > 2.35.1
> > > > > > >
> > > > > >
> > > > > > ---------------------------------------------------------
> > > > > > ----
> > > > > > --------
> > > > > > To unsubscribe, e-mail:
> > > > > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > > > > For additional commands, e-mail:
> > > > > > virtio-dev-help@lists.oasis-open.org
> > > > > >
> > >
> > > ---------------------------------------------------------------
> > > ------
> > > To unsubscribe, e-mail:
> > > virtio-dev-unsubscribe@lists.oasis-open.org
> > > For additional commands, e-mail:
> > > virtio-dev-help@lists.oasis-open.org
> > >
Hi Bobby,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on mst-vhost/linux-next]
[also build test WARNING on linus/master v6.0-rc1 next-20220815]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Bobby-Eshleman/virtio-vsock-introduce-dgrams-sk_buff-and-qdisc/20220816-015812
base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20220816/202208160405.cG02E3MZ-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/cbb332da78c86ac574688831ed6f404d04d506db
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Bobby-Eshleman/virtio-vsock-introduce-dgrams-sk_buff-and-qdisc/20220816-015812
git checkout cbb332da78c86ac574688831ed6f404d04d506db
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash net/vmw_vsock/
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
net/vmw_vsock/virtio_transport_common.c: In function 'virtio_transport_dgram_do_dequeue':
>> net/vmw_vsock/virtio_transport_common.c:605:13: warning: variable 'free_space' set but not used [-Wunused-but-set-variable]
605 | u32 free_space;
| ^~~~~~~~~~
vim +/free_space +605 net/vmw_vsock/virtio_transport_common.c
597
598 static ssize_t
599 virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
600 struct msghdr *msg, size_t len)
601 {
602 struct virtio_vsock_sock *vvs = vsk->trans;
603 struct sk_buff *skb;
604 size_t total = 0;
> 605 u32 free_space;
606 int err = -EFAULT;
607
608 spin_lock_bh(&vvs->rx_lock);
609 if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
610 skb = __skb_dequeue(&vvs->rx_queue);
611
612 total = len;
613 if (total > skb->len - vsock_metadata(skb)->off)
614 total = skb->len - vsock_metadata(skb)->off;
615 else if (total < skb->len - vsock_metadata(skb)->off)
616 msg->msg_flags |= MSG_TRUNC;
617
618 /* sk_lock is held by caller so no one else can dequeue.
619 * Unlock rx_lock since memcpy_to_msg() may sleep.
620 */
621 spin_unlock_bh(&vvs->rx_lock);
622
623 err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, total);
624 if (err)
625 return err;
626
627 spin_lock_bh(&vvs->rx_lock);
628
629 virtio_transport_dec_rx_pkt(vvs, skb);
630 consume_skb(skb);
631 }
632
633 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
634
635 spin_unlock_bh(&vvs->rx_lock);
636
637 if (total > 0 && msg->msg_name) {
638 /* Provide the address of the sender. */
639 DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
640
641 vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
642 le32_to_cpu(vsock_hdr(skb)->src_port));
643 msg->msg_namelen = sizeof(*vm_addr);
644 }
645 return total;
646 }
647
--
0-DAY CI Kernel Test Service
https://01.org/lkp
© 2016 - 2026 Red Hat, Inc.