[PATCH net-next v6 3/9] vsock: add netns to vsock core

Bobby Eshleman posted 9 patches 2 weeks, 1 day ago
[PATCH net-next v6 3/9] vsock: add netns to vsock core
Posted by Bobby Eshleman 2 weeks, 1 day ago
From: Bobby Eshleman <bobbyeshleman@meta.com>

Add netns to logic to vsock core. Additionally, modify transport hook
prototypes to be used by later transport-specific patches (e.g.,
*_seqpacket_allow()).

Namespaces are supported primarily by changing socket lookup functions
(e.g., vsock_find_connected_socket()) to take into account the socket
namespace and the namespace mode before considering a candidate socket a
"match".

Introduce a dummy namespace struct, __vsock_global_dummy_net, to be
used by transports that do not support namespacing. This dummy always
has mode "global" to preserve previous CID behavior.

This patch also introduces the sysctl /proc/sys/net/vsock/ns_mode that
accepts the "global" or "local" mode strings.

The transports (besides vhost) are modified to use the global dummy.

Add netns functionality (initialization, passing to transports, procfs,
etc...) to the af_vsock socket layer. Later patches that add netns
support to transports depend on this patch.

Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>

---
Changes in v6:
- unregister sysctl ops in vsock_exit()
- af_vsock: clarify description of CID behavior
- af_vsock: fix buf vs buffer naming, and length checking
- af_vsock: fix length checking w/ correct ctl_table->maxlen

Changes in v5:
- vsock_global_net() -> vsock_global_dummy_net()
- update comments for new uAPI
- use /proc/sys/net/vsock/ns_mode instead of /proc/net/vsock_ns_mode
- add prototype changes so patch remains compilable
---
 drivers/vhost/vsock.c                   |   4 +-
 include/net/af_vsock.h                  |  15 ++-
 net/vmw_vsock/af_vsock.c                | 219 ++++++++++++++++++++++++++++++--
 net/vmw_vsock/hyperv_transport.c        |   2 +-
 net/vmw_vsock/virtio_transport.c        |   6 +-
 net/vmw_vsock/virtio_transport_common.c |   4 +-
 net/vmw_vsock/vmci_transport.c          |   6 +-
 net/vmw_vsock/vsock_loopback.c          |   4 +-
 8 files changed, 234 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index ae01457ea2cd..34adf0cf9124 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -404,7 +404,7 @@ static bool vhost_transport_msgzerocopy_allow(void)
 	return true;
 }
 
-static bool vhost_transport_seqpacket_allow(u32 remote_cid);
+static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid);
 
 static struct virtio_transport vhost_transport = {
 	.transport = {
@@ -460,7 +460,7 @@ static struct virtio_transport vhost_transport = {
 	.send_pkt = vhost_transport_send_pkt,
 };
 
-static bool vhost_transport_seqpacket_allow(u32 remote_cid)
+static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
 {
 	struct vhost_vsock *vsock;
 	bool seqpacket_allow = false;
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 2857e97699de..628e35ae9d00 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -145,7 +145,7 @@ struct vsock_transport {
 				     int flags);
 	int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
 				 size_t len);
-	bool (*seqpacket_allow)(u32 remote_cid);
+	bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid);
 	u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
 
 	/* Notification. */
@@ -215,9 +215,12 @@ void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
 void vsock_insert_connected(struct vsock_sock *vsk);
 void vsock_remove_bound(struct vsock_sock *vsk);
 void vsock_remove_connected(struct vsock_sock *vsk);
-struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr, struct net *net,
+				     enum vsock_net_mode orig_net_mode);
 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
-					 struct sockaddr_vm *dst);
+					 struct sockaddr_vm *dst,
+					 struct net *net,
+					 enum vsock_net_mode orig_net_mode);
 void vsock_remove_sock(struct vsock_sock *vsk);
 void vsock_for_each_connected_socket(struct vsock_transport *transport,
 				     void (*fn)(struct sock *sk));
@@ -259,6 +262,12 @@ static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
 	return t->msgzerocopy_allow && t->msgzerocopy_allow();
 }
 
+extern struct net __vsock_global_dummy_net;
+static inline struct net *vsock_global_dummy_net(void)
+{
+	return &__vsock_global_dummy_net;
+}
+
 static inline enum vsock_net_mode vsock_net_mode(struct net *net)
 {
 	enum vsock_net_mode ret;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 0538948d5fd9..c78aba9cd20e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -83,6 +83,35 @@
  *   TCP_ESTABLISHED - connected
  *   TCP_CLOSING - disconnecting
  *   TCP_LISTEN - listening
+ *
+ * - Namespaces in vsock support two different modes configured
+ *   through /proc/sys/net/vsock/ns_mode. The modes are "local" and "global".
+ *   Each mode defines how the namespace interacts with CIDs.
+ *   /proc/sys/net/vsock/ns_mode is write-once, so that it may be configured
+ *   and locked down by a namespace manager. The default is "global". The mode
+ *   is set per-namespace.
+ *
+ *   The modes affect the allocation and accessibility of CIDs as follows:
+
+ *   - global - access and allocation are all system-wide
+ *      - all CID allocation from global namespaces draw from the same
+ *        system-wide pool
+ *      - if one global namespace has already allocated some CID, another
+ *        global namespace will not be able to allocate the same CID
+ *      - global mode AF_VSOCK sockets can reach any VM or socket in any global
+ *        namespace, they are not contained to only their own namespace
+ *      - AF_VSOCK sockets in a global mode namespace cannot reach VMs or
+ *        sockets in any local mode namespace
+ *   - local - access and allocation are contained within the namespace
+ *     - CID allocation draws only from a private pool local only to the
+ *       namespace, and does not affect the CIDs available for allocation in any
+ *       other namespace (global or local)
+ *     - VMs in a local namespace do not collide with CIDs in any other local
+ *       namespace or any global namespace. For example, if a VM in a local mode
+ *       namespace is given CID 10, then CID 10 is still available for
+ *       allocation in any other namespace, but not in the same namespace
+ *     - AF_VSOCK sockets in a local mode namespace can connect only to VMs or
+ *       other sockets within their own namespace.
  */
 
 #include <linux/compat.h>
@@ -100,6 +129,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/net.h>
+#include <linux/proc_fs.h>
 #include <linux/poll.h>
 #include <linux/random.h>
 #include <linux/skbuff.h>
@@ -111,9 +141,14 @@
 #include <linux/workqueue.h>
 #include <net/sock.h>
 #include <net/af_vsock.h>
+#include <net/netns/vsock.h>
 #include <uapi/linux/vm_sockets.h>
 #include <uapi/asm-generic/ioctls.h>
 
+#define VSOCK_NET_MODE_STR_GLOBAL "global"
+#define VSOCK_NET_MODE_STR_LOCAL "local"
+#define VSOCK_NET_MODE_STR_MAX 8
+
 static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
 static void vsock_sk_destruct(struct sock *sk);
 static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
@@ -149,6 +184,9 @@ static const struct vsock_transport *transport_dgram;
 static const struct vsock_transport *transport_local;
 static DEFINE_MUTEX(vsock_register_mutex);
 
+struct net __vsock_global_dummy_net;
+EXPORT_SYMBOL_GPL(__vsock_global_dummy_net);
+
 /**** UTILS ****/
 
 /* Each bound VSocket is stored in the bind hash table and each connected
@@ -235,17 +273,21 @@ static void __vsock_remove_connected(struct vsock_sock *vsk)
 	sock_put(&vsk->sk);
 }
 
-static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
+static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr,
+					      struct net *net,
+					      enum vsock_net_mode orig_net_mode)
 {
 	struct vsock_sock *vsk;
 
 	list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
-		if (vsock_addr_equals_addr(addr, &vsk->local_addr))
+		if (vsock_addr_equals_addr(addr, &vsk->local_addr) &&
+		    vsock_net_check_mode(vsk, net, orig_net_mode))
 			return sk_vsock(vsk);
 
 		if (addr->svm_port == vsk->local_addr.svm_port &&
 		    (vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
-		     addr->svm_cid == VMADDR_CID_ANY))
+		     addr->svm_cid == VMADDR_CID_ANY) &&
+		     vsock_net_check_mode(vsk, net, orig_net_mode))
 			return sk_vsock(vsk);
 	}
 
@@ -253,14 +295,17 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
 }
 
 static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
-						  struct sockaddr_vm *dst)
+						  struct sockaddr_vm *dst,
+						  struct net *net,
+						  enum vsock_net_mode orig_net_mode)
 {
 	struct vsock_sock *vsk;
 
 	list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
 			    connected_table) {
 		if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
-		    dst->svm_port == vsk->local_addr.svm_port) {
+		    dst->svm_port == vsk->local_addr.svm_port &&
+		    vsock_net_check_mode(vsk, net, orig_net_mode)) {
 			return sk_vsock(vsk);
 		}
 	}
@@ -304,12 +349,13 @@ void vsock_remove_connected(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_remove_connected);
 
-struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr, struct net *net,
+				     enum vsock_net_mode orig_net_mode)
 {
 	struct sock *sk;
 
 	spin_lock_bh(&vsock_table_lock);
-	sk = __vsock_find_bound_socket(addr);
+	sk = __vsock_find_bound_socket(addr, net, orig_net_mode);
 	if (sk)
 		sock_hold(sk);
 
@@ -320,12 +366,14 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
 EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
 
 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
-					 struct sockaddr_vm *dst)
+					 struct sockaddr_vm *dst,
+					 struct net *net,
+					 enum vsock_net_mode orig_net_mode)
 {
 	struct sock *sk;
 
 	spin_lock_bh(&vsock_table_lock);
-	sk = __vsock_find_connected_socket(src, dst);
+	sk = __vsock_find_connected_socket(src, dst, net, orig_net_mode);
 	if (sk)
 		sock_hold(sk);
 
@@ -528,7 +576,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 
 	if (sk->sk_type == SOCK_SEQPACKET) {
 		if (!new_transport->seqpacket_allow ||
-		    !new_transport->seqpacket_allow(remote_cid)) {
+		    !new_transport->seqpacket_allow(vsk, remote_cid)) {
 			module_put(new_transport->module);
 			return -ESOCKTNOSUPPORT;
 		}
@@ -676,6 +724,7 @@ static void vsock_pending_work(struct work_struct *work)
 static int __vsock_bind_connectible(struct vsock_sock *vsk,
 				    struct sockaddr_vm *addr)
 {
+	struct net *net = sock_net(sk_vsock(vsk));
 	static u32 port;
 	struct sockaddr_vm new_addr;
 
@@ -695,7 +744,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
 
 			new_addr.svm_port = port++;
 
-			if (!__vsock_find_bound_socket(&new_addr)) {
+			if (!__vsock_find_bound_socket(&new_addr, net,
+						       vsk->orig_net_mode)) {
 				found = true;
 				break;
 			}
@@ -712,7 +762,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
 			return -EACCES;
 		}
 
-		if (__vsock_find_bound_socket(&new_addr))
+		if (__vsock_find_bound_socket(&new_addr, net,
+					      vsk->orig_net_mode))
 			return -EADDRINUSE;
 	}
 
@@ -2552,6 +2603,7 @@ static int vsock_create(struct net *net, struct socket *sock,
 		return -ENOMEM;
 
 	vsk = vsock_sk(sk);
+	vsk->orig_net_mode = vsock_net_mode(net);
 
 	if (sock->type == SOCK_DGRAM) {
 		ret = vsock_assign_transport(vsk, NULL);
@@ -2636,6 +2688,139 @@ static struct miscdevice vsock_device = {
 	.fops		= &vsock_device_ops,
 };
 
+static int vsock_net_mode_string(const struct ctl_table *table, int write,
+				 void *buffer, size_t *lenp, loff_t *ppos)
+{
+	char data[VSOCK_NET_MODE_STR_MAX] = {0};
+	enum vsock_net_mode mode;
+	struct ctl_table tmp;
+	struct net *net;
+	int ret;
+
+	if (!table->data || !table->maxlen || !*lenp) {
+		*lenp = 0;
+		return 0;
+	}
+
+	net = current->nsproxy->net_ns;
+	tmp = *table;
+	tmp.data = data;
+
+	if (!write) {
+		const char *p;
+
+		mode = vsock_net_mode(net);
+
+		if (mode == VSOCK_NET_MODE_GLOBAL) {
+			p = VSOCK_NET_MODE_STR_GLOBAL;
+		} else if (mode == VSOCK_NET_MODE_LOCAL) {
+			p = VSOCK_NET_MODE_STR_LOCAL;
+		} else {
+			WARN_ONCE(true, "netns has invalid vsock mode");
+			*lenp = 0;
+			return 0;
+		}
+
+		strscpy(data, p, sizeof(data));
+		tmp.maxlen = strlen(p);
+	}
+
+	ret = proc_dostring(&tmp, write, buffer, lenp, ppos);
+	if (ret)
+		return ret;
+
+	if (write) {
+		if (*lenp >= sizeof(data))
+			return -EINVAL;
+
+		if (!strncmp(data, VSOCK_NET_MODE_STR_GLOBAL, sizeof(data)))
+			mode = VSOCK_NET_MODE_GLOBAL;
+		else if (!strncmp(data, VSOCK_NET_MODE_STR_LOCAL, sizeof(data)))
+			mode = VSOCK_NET_MODE_LOCAL;
+		else
+			return -EINVAL;
+
+		if (!vsock_net_write_mode(net, mode))
+			return -EPERM;
+	}
+
+	return 0;
+}
+
+static struct ctl_table vsock_table[] = {
+	{
+		.procname	= "ns_mode",
+		.data		= &init_net.vsock.mode,
+		.maxlen		= VSOCK_NET_MODE_STR_MAX,
+		.mode		= 0644,
+		.proc_handler	= vsock_net_mode_string
+	},
+};
+
+static int __net_init vsock_sysctl_register(struct net *net)
+{
+	struct ctl_table *table;
+
+	if (net_eq(net, &init_net)) {
+		table = vsock_table;
+	} else {
+		table = kmemdup(vsock_table, sizeof(vsock_table), GFP_KERNEL);
+		if (!table)
+			goto err_alloc;
+
+		table[0].data = &net->vsock.mode;
+	}
+
+	net->vsock.vsock_hdr = register_net_sysctl_sz(net, "net/vsock", table,
+						      ARRAY_SIZE(vsock_table));
+	if (!net->vsock.vsock_hdr)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void vsock_sysctl_unregister(struct net *net)
+{
+	const struct ctl_table *table;
+
+	table = net->vsock.vsock_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->vsock.vsock_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+static void vsock_net_init(struct net *net)
+{
+	spin_lock_init(&net->vsock.lock);
+	net->vsock.mode = VSOCK_NET_MODE_GLOBAL;
+}
+
+static __net_init int vsock_sysctl_init_net(struct net *net)
+{
+	vsock_net_init(net);
+
+	if (vsock_sysctl_register(net))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static __net_exit void vsock_sysctl_exit_net(struct net *net)
+{
+	vsock_sysctl_unregister(net);
+}
+
+static struct pernet_operations vsock_sysctl_ops __net_initdata = {
+	.init = vsock_sysctl_init_net,
+	.exit = vsock_sysctl_exit_net,
+};
+
 static int __init vsock_init(void)
 {
 	int err = 0;
@@ -2663,10 +2848,19 @@ static int __init vsock_init(void)
 		goto err_unregister_proto;
 	}
 
+	if (register_pernet_subsys(&vsock_sysctl_ops)) {
+		err = -ENOMEM;
+		goto err_unregister_sock;
+	}
+
+	vsock_net_init(&init_net);
+	vsock_net_init(vsock_global_dummy_net());
 	vsock_bpf_build_proto();
 
 	return 0;
 
+err_unregister_sock:
+	sock_unregister(AF_VSOCK);
 err_unregister_proto:
 	proto_unregister(&vsock_proto);
 err_deregister_misc:
@@ -2680,6 +2874,7 @@ static void __exit vsock_exit(void)
 	misc_deregister(&vsock_device);
 	sock_unregister(AF_VSOCK);
 	proto_unregister(&vsock_proto);
+	unregister_pernet_subsys(&vsock_sysctl_ops);
 }
 
 const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 432fcbbd14d4..79bc55eeecb3 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -313,7 +313,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
 		return;
 
 	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
-	sk = vsock_find_bound_socket(&addr);
+	sk = vsock_find_bound_socket(&addr, vsock_global_dummy_net());
 	if (!sk)
 		return;
 
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index b6569b0ca2bb..4626ba0428ef 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -536,7 +536,7 @@ static bool virtio_transport_msgzerocopy_allow(void)
 	return true;
 }
 
-static bool virtio_transport_seqpacket_allow(u32 remote_cid);
+static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid);
 
 static struct virtio_transport virtio_transport = {
 	.transport = {
@@ -593,7 +593,7 @@ static struct virtio_transport virtio_transport = {
 	.can_msgzerocopy = virtio_transport_can_msgzerocopy,
 };
 
-static bool virtio_transport_seqpacket_allow(u32 remote_cid)
+static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
 {
 	struct virtio_vsock *vsock;
 	bool seqpacket_allow;
@@ -659,6 +659,8 @@ static void virtio_transport_rx_work(struct work_struct *work)
 			if (payload_len)
 				virtio_vsock_skb_put(skb, payload_len);
 
+			virtio_vsock_skb_set_net(skb, vsock_global_dummy_net());
+			virtio_vsock_skb_set_orig_net_mode(skb, VSOCK_NET_MODE_GLOBAL);
 			virtio_transport_deliver_tap_pkt(skb);
 			virtio_transport_recv_pkt(&virtio_transport, skb);
 		}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index dcc8a1d5851e..1a9129e33d51 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1606,9 +1606,9 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
 	/* The socket must be in connected or bound table
 	 * otherwise send reset back
 	 */
-	sk = vsock_find_connected_socket(&src, &dst);
+	sk = vsock_find_connected_socket(&src, &dst, vsock_global_dummy_net());
 	if (!sk) {
-		sk = vsock_find_bound_socket(&dst);
+		sk = vsock_find_bound_socket(&dst, vsock_global_dummy_net());
 		if (!sk) {
 			(void)virtio_transport_reset_no_sock(t, skb);
 			goto free_pkt;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7eccd6708d66..aa0cd2efe561 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -703,9 +703,11 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
 	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 	vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
 
-	sk = vsock_find_connected_socket(&src, &dst);
+	sk = vsock_find_connected_socket(&src, &dst, vsock_global_dummy_net(),
+					 VSOCK_NET_MODE_GLOBAL);
 	if (!sk) {
-		sk = vsock_find_bound_socket(&dst);
+		sk = vsock_find_bound_socket(&dst, vsock_global_dummy_net(),
+					     VSOCK_NET_MODE_GLOBAL);
 		if (!sk) {
 			/* We could not find a socket for this specified
 			 * address.  If this packet is a RST, we just drop it.
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 6e78927a598e..1b2fab73e0d0 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -46,7 +46,7 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
 	return 0;
 }
 
-static bool vsock_loopback_seqpacket_allow(u32 remote_cid);
+static bool vsock_loopback_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid);
 static bool vsock_loopback_msgzerocopy_allow(void)
 {
 	return true;
@@ -106,7 +106,7 @@ static struct virtio_transport loopback_transport = {
 	.send_pkt = vsock_loopback_send_pkt,
 };
 
-static bool vsock_loopback_seqpacket_allow(u32 remote_cid)
+static bool vsock_loopback_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
 {
 	return true;
 }

-- 
2.47.3
Re: [PATCH net-next v6 3/9] vsock: add netns to vsock core
Posted by Stefano Garzarella 5 days, 19 hours ago
On Tue, Sep 16, 2025 at 04:43:47PM -0700, Bobby Eshleman wrote:
>From: Bobby Eshleman <bobbyeshleman@meta.com>
>
>Add netns to logic to vsock core. Additionally, modify transport hook
>prototypes to be used by later transport-specific patches (e.g.,
>*_seqpacket_allow()).
>
>Namespaces are supported primarily by changing socket lookup functions
>(e.g., vsock_find_connected_socket()) to take into account the socket
>namespace and the namespace mode before considering a candidate socket a
>"match".
>
>Introduce a dummy namespace struct, __vsock_global_dummy_net, to be
>used by transports that do not support namespacing. This dummy always
>has mode "global" to preserve previous CID behavior.
>
>This patch also introduces the sysctl /proc/sys/net/vsock/ns_mode that
>accepts the "global" or "local" mode strings.
>
>The transports (besides vhost) are modified to use the global dummy.

Why not `vhost`?

>
>Add netns functionality (initialization, passing to transports, procfs,
>etc...) to the af_vsock socket layer. Later patches that add netns
>support to transports depend on this patch.
>
>Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
>
>---
>Changes in v6:
>- unregister sysctl ops in vsock_exit()
>- af_vsock: clarify description of CID behavior
>- af_vsock: fix buf vs buffer naming, and length checking
>- af_vsock: fix length checking w/ correct ctl_table->maxlen
>
>Changes in v5:
>- vsock_global_net() -> vsock_global_dummy_net()
>- update comments for new uAPI
>- use /proc/sys/net/vsock/ns_mode instead of /proc/net/vsock_ns_mode
>- add prototype changes so patch remains compilable
>---
> drivers/vhost/vsock.c                   |   4 +-
> include/net/af_vsock.h                  |  15 ++-
> net/vmw_vsock/af_vsock.c                | 219 ++++++++++++++++++++++++++++++--
> net/vmw_vsock/hyperv_transport.c        |   2 +-
> net/vmw_vsock/virtio_transport.c        |   6 +-
> net/vmw_vsock/virtio_transport_common.c |   4 +-
> net/vmw_vsock/vmci_transport.c          |   6 +-
> net/vmw_vsock/vsock_loopback.c          |   4 +-
> 8 files changed, 234 insertions(+), 26 deletions(-)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index ae01457ea2cd..34adf0cf9124 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -404,7 +404,7 @@ static bool vhost_transport_msgzerocopy_allow(void)
> 	return true;
> }
>
>-static bool vhost_transport_seqpacket_allow(u32 remote_cid);
>+static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid);
>
> static struct virtio_transport vhost_transport = {
> 	.transport = {
>@@ -460,7 +460,7 @@ static struct virtio_transport vhost_transport = {
> 	.send_pkt = vhost_transport_send_pkt,
> };
>
>-static bool vhost_transport_seqpacket_allow(u32 remote_cid)
>+static bool vhost_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
> {
> 	struct vhost_vsock *vsock;
> 	bool seqpacket_allow = false;
>diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>index 2857e97699de..628e35ae9d00 100644
>--- a/include/net/af_vsock.h
>+++ b/include/net/af_vsock.h
>@@ -145,7 +145,7 @@ struct vsock_transport {
> 				     int flags);
> 	int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
> 				 size_t len);
>-	bool (*seqpacket_allow)(u32 remote_cid);
>+	bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid);

Why we need this change?
Not sure if we should do in a separate patch.

> 	u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
>
> 	/* Notification. */
>@@ -215,9 +215,12 @@ void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
> void vsock_insert_connected(struct vsock_sock *vsk);
> void vsock_remove_bound(struct vsock_sock *vsk);
> void vsock_remove_connected(struct vsock_sock *vsk);
>-struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
>+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr, struct net *net,
>+				     enum vsock_net_mode orig_net_mode);
> struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
>-					 struct sockaddr_vm *dst);
>+					 struct sockaddr_vm *dst,
>+					 struct net *net,
>+					 enum vsock_net_mode orig_net_mode);
> void vsock_remove_sock(struct vsock_sock *vsk);
> void vsock_for_each_connected_socket(struct vsock_transport *transport,
> 				     void (*fn)(struct sock *sk));
>@@ -259,6 +262,12 @@ static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
> 	return t->msgzerocopy_allow && t->msgzerocopy_allow();
> }
>

I'd add a comment here to explain when it should be used.

>+extern struct net __vsock_global_dummy_net;
>+static inline struct net *vsock_global_dummy_net(void)
>+{
>+	return &__vsock_global_dummy_net;
>+}
>+
> static inline enum vsock_net_mode vsock_net_mode(struct net *net)
> {
> 	enum vsock_net_mode ret;
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index 0538948d5fd9..c78aba9cd20e 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -83,6 +83,35 @@
>  *   TCP_ESTABLISHED - connected
>  *   TCP_CLOSING - disconnecting
>  *   TCP_LISTEN - listening
>+ *
>+ * - Namespaces in vsock support two different modes configured
>+ *   through /proc/sys/net/vsock/ns_mode. The modes are "local" and "global".
>+ *   Each mode defines how the namespace interacts with CIDs.
>+ *   /proc/sys/net/vsock/ns_mode is write-once, so that it may be configured
>+ *   and locked down by a namespace manager. The default is "global". The mode
>+ *   is set per-namespace.
>+ *
>+ *   The modes affect the allocation and accessibility of CIDs as follows:
>+
>+ *   - global - access and allocation are all system-wide
>+ *      - all CID allocation from global namespaces draw from the same
>+ *        system-wide pool
>+ *      - if one global namespace has already allocated some CID, another
>+ *        global namespace will not be able to allocate the same CID
>+ *      - global mode AF_VSOCK sockets can reach any VM or socket in any global
>+ *        namespace, they are not contained to only their own namespace
>+ *      - AF_VSOCK sockets in a global mode namespace cannot reach VMs or
>+ *        sockets in any local mode namespace
>+ *   - local - access and allocation are contained within the namespace
>+ *     - CID allocation draws only from a private pool local only to the
>+ *       namespace, and does not affect the CIDs available for allocation in any
>+ *       other namespace (global or local)
>+ *     - VMs in a local namespace do not collide with CIDs in any other local
>+ *       namespace or any global namespace. For example, if a VM in a local mode
>+ *       namespace is given CID 10, then CID 10 is still available for
>+ *       allocation in any other namespace, but not in the same namespace
>+ *     - AF_VSOCK sockets in a local mode namespace can connect only to VMs or
>+ *       other sockets within their own namespace.
>  */
>
> #include <linux/compat.h>
>@@ -100,6 +129,7 @@
> #include <linux/module.h>
> #include <linux/mutex.h>
> #include <linux/net.h>
>+#include <linux/proc_fs.h>
> #include <linux/poll.h>
> #include <linux/random.h>
> #include <linux/skbuff.h>
>@@ -111,9 +141,14 @@
> #include <linux/workqueue.h>
> #include <net/sock.h>
> #include <net/af_vsock.h>
>+#include <net/netns/vsock.h>
> #include <uapi/linux/vm_sockets.h>
> #include <uapi/asm-generic/ioctls.h>
>
>+#define VSOCK_NET_MODE_STR_GLOBAL "global"
>+#define VSOCK_NET_MODE_STR_LOCAL "local"
>+#define VSOCK_NET_MODE_STR_MAX 8

Why 8 ?

>+
> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
> static void vsock_sk_destruct(struct sock *sk);
> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
>@@ -149,6 +184,9 @@ static const struct vsock_transport *transport_dgram;
> static const struct vsock_transport *transport_local;
> static DEFINE_MUTEX(vsock_register_mutex);
>
>+struct net __vsock_global_dummy_net;
>+EXPORT_SYMBOL_GPL(__vsock_global_dummy_net);
>+
> /**** UTILS ****/
>
> /* Each bound VSocket is stored in the bind hash table and each connected
>@@ -235,17 +273,21 @@ static void __vsock_remove_connected(struct vsock_sock *vsk)
> 	sock_put(&vsk->sk);
> }
>
>-static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
>+static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr,
>+					      struct net *net,
>+					      enum vsock_net_mode orig_net_mode)
> {
> 	struct vsock_sock *vsk;
>
> 	list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
>-		if (vsock_addr_equals_addr(addr, &vsk->local_addr))
>+		if (vsock_addr_equals_addr(addr, &vsk->local_addr) &&
>+		    vsock_net_check_mode(vsk, net, orig_net_mode))
> 			return sk_vsock(vsk);
>
> 		if (addr->svm_port == vsk->local_addr.svm_port &&
> 		    (vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
>-		     addr->svm_cid == VMADDR_CID_ANY))
>+		     addr->svm_cid == VMADDR_CID_ANY) &&
>+		     vsock_net_check_mode(vsk, net, orig_net_mode))
> 			return sk_vsock(vsk);
> 	}
>
>@@ -253,14 +295,17 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
> }
>
> static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
>-						  struct sockaddr_vm *dst)
>+						  struct sockaddr_vm *dst,
>+						  struct net *net,
>+						  enum vsock_net_mode orig_net_mode)
> {
> 	struct vsock_sock *vsk;
>
> 	list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
> 			    connected_table) {
> 		if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
>-		    dst->svm_port == vsk->local_addr.svm_port) {
>+		    dst->svm_port == vsk->local_addr.svm_port &&
>+		    vsock_net_check_mode(vsk, net, orig_net_mode)) {
> 			return sk_vsock(vsk);
> 		}
> 	}
>@@ -304,12 +349,13 @@ void vsock_remove_connected(struct vsock_sock *vsk)
> }
> EXPORT_SYMBOL_GPL(vsock_remove_connected);
>
>-struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
>+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr, struct net *net,
>+				     enum vsock_net_mode orig_net_mode)
> {
> 	struct sock *sk;
>
> 	spin_lock_bh(&vsock_table_lock);
>-	sk = __vsock_find_bound_socket(addr);
>+	sk = __vsock_find_bound_socket(addr, net, orig_net_mode);
> 	if (sk)
> 		sock_hold(sk);
>
>@@ -320,12 +366,14 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
> EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
>
> struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
>-					 struct sockaddr_vm *dst)
>+					 struct sockaddr_vm *dst,
>+					 struct net *net,
>+					 enum vsock_net_mode orig_net_mode)
> {
> 	struct sock *sk;
>
> 	spin_lock_bh(&vsock_table_lock);
>-	sk = __vsock_find_connected_socket(src, dst);
>+	sk = __vsock_find_connected_socket(src, dst, net, orig_net_mode);
> 	if (sk)
> 		sock_hold(sk);
>
>@@ -528,7 +576,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
>
> 	if (sk->sk_type == SOCK_SEQPACKET) {
> 		if (!new_transport->seqpacket_allow ||
>-		    !new_transport->seqpacket_allow(remote_cid)) {
>+		    !new_transport->seqpacket_allow(vsk, remote_cid)) {
> 			module_put(new_transport->module);
> 			return -ESOCKTNOSUPPORT;
> 		}
>@@ -676,6 +724,7 @@ static void vsock_pending_work(struct work_struct *work)
> static int __vsock_bind_connectible(struct vsock_sock *vsk,
> 				    struct sockaddr_vm *addr)
> {
>+	struct net *net = sock_net(sk_vsock(vsk));
> 	static u32 port;
> 	struct sockaddr_vm new_addr;
>
>@@ -695,7 +744,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
>
> 			new_addr.svm_port = port++;
>
>-			if (!__vsock_find_bound_socket(&new_addr)) {
>+			if (!__vsock_find_bound_socket(&new_addr, net,
>+						       vsk->orig_net_mode)) {
> 				found = true;
> 				break;
> 			}
>@@ -712,7 +762,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> 			return -EACCES;
> 		}
>
>-		if (__vsock_find_bound_socket(&new_addr))
>+		if (__vsock_find_bound_socket(&new_addr, net,
>+					      vsk->orig_net_mode))
> 			return -EADDRINUSE;
> 	}
>
>@@ -2552,6 +2603,7 @@ static int vsock_create(struct net *net, struct socket *sock,
> 		return -ENOMEM;
>
> 	vsk = vsock_sk(sk);
>+	vsk->orig_net_mode = vsock_net_mode(net);
>
> 	if (sock->type == SOCK_DGRAM) {
> 		ret = vsock_assign_transport(vsk, NULL);
>@@ -2636,6 +2688,139 @@ static struct miscdevice vsock_device = {
> 	.fops		= &vsock_device_ops,
> };
>
>+static int vsock_net_mode_string(const struct ctl_table *table, int write,
>+				 void *buffer, size_t *lenp, loff_t *ppos)
>+{
>+	char data[VSOCK_NET_MODE_STR_MAX] = {0};
>+	enum vsock_net_mode mode;
>+	struct ctl_table tmp;
>+	struct net *net;
>+	int ret;
>+
>+	if (!table->data || !table->maxlen || !*lenp) {
>+		*lenp = 0;
>+		return 0;
>+	}
>+
>+	net = current->nsproxy->net_ns;
>+	tmp = *table;
>+	tmp.data = data;
>+
>+	if (!write) {
>+		const char *p;
>+
>+		mode = vsock_net_mode(net);
>+

Can we use a switch here?

>+		if (mode == VSOCK_NET_MODE_GLOBAL) {
>+			p = VSOCK_NET_MODE_STR_GLOBAL;
>+		} else if (mode == VSOCK_NET_MODE_LOCAL) {
>+			p = VSOCK_NET_MODE_STR_LOCAL;
>+		} else {
>+			WARN_ONCE(true, "netns has invalid vsock mode");
>+			*lenp = 0;
>+			return 0;
>+		}
>+
>+		strscpy(data, p, sizeof(data));
>+		tmp.maxlen = strlen(p);
>+	}
>+
>+	ret = proc_dostring(&tmp, write, buffer, lenp, ppos);
>+	if (ret)
>+		return ret;
>+
>+	if (write) {
>+		if (*lenp >= sizeof(data))
>+			return -EINVAL;
>+
>+		if (!strncmp(data, VSOCK_NET_MODE_STR_GLOBAL, sizeof(data)))
>+			mode = VSOCK_NET_MODE_GLOBAL;
>+		else if (!strncmp(data, VSOCK_NET_MODE_STR_LOCAL, sizeof(data)))
>+			mode = VSOCK_NET_MODE_LOCAL;
>+		else
>+			return -EINVAL;
>+
>+		if (!vsock_net_write_mode(net, mode))
>+			return -EPERM;
>+	}
>+
>+	return 0;
>+}
>+
>+static struct ctl_table vsock_table[] = {
>+	{
>+		.procname	= "ns_mode",
>+		.data		= &init_net.vsock.mode,
>+		.maxlen		= VSOCK_NET_MODE_STR_MAX,
>+		.mode		= 0644,
>+		.proc_handler	= vsock_net_mode_string
>+	},
>+};
>+
>+static int __net_init vsock_sysctl_register(struct net *net)
>+{
>+	struct ctl_table *table;
>+
>+	if (net_eq(net, &init_net)) {
>+		table = vsock_table;
>+	} else {
>+		table = kmemdup(vsock_table, sizeof(vsock_table), GFP_KERNEL);
>+		if (!table)
>+			goto err_alloc;
>+
>+		table[0].data = &net->vsock.mode;
>+	}
>+
>+	net->vsock.vsock_hdr = register_net_sysctl_sz(net, "net/vsock", table,
>+						      ARRAY_SIZE(vsock_table));
>+	if (!net->vsock.vsock_hdr)
>+		goto err_reg;
>+
>+	return 0;
>+
>+err_reg:
>+	if (!net_eq(net, &init_net))
>+		kfree(table);
>+err_alloc:
>+	return -ENOMEM;
>+}
>+
>+static void vsock_sysctl_unregister(struct net *net)
>+{
>+	const struct ctl_table *table;
>+
>+	table = net->vsock.vsock_hdr->ctl_table_arg;
>+	unregister_net_sysctl_table(net->vsock.vsock_hdr);
>+	if (!net_eq(net, &init_net))
>+		kfree(table);
>+}
>+
>+static void vsock_net_init(struct net *net)
>+{
>+	spin_lock_init(&net->vsock.lock);
>+	net->vsock.mode = VSOCK_NET_MODE_GLOBAL;
>+}
>+
>+static __net_init int vsock_sysctl_init_net(struct net *net)
>+{
>+	vsock_net_init(net);
>+
>+	if (vsock_sysctl_register(net))
>+		return -ENOMEM;
>+
>+	return 0;
>+}
>+
>+static __net_exit void vsock_sysctl_exit_net(struct net *net)
>+{
>+	vsock_sysctl_unregister(net);
>+}
>+
>+static struct pernet_operations vsock_sysctl_ops __net_initdata = {
>+	.init = vsock_sysctl_init_net,
>+	.exit = vsock_sysctl_exit_net,
>+};
>+
> static int __init vsock_init(void)
> {
> 	int err = 0;
>@@ -2663,10 +2848,19 @@ static int __init vsock_init(void)
> 		goto err_unregister_proto;
> 	}
>
>+	if (register_pernet_subsys(&vsock_sysctl_ops)) {
>+		err = -ENOMEM;
>+		goto err_unregister_sock;
>+	}
>+
>+	vsock_net_init(&init_net);
>+	vsock_net_init(vsock_global_dummy_net());
> 	vsock_bpf_build_proto();
>
> 	return 0;
>
>+err_unregister_sock:
>+	sock_unregister(AF_VSOCK);
> err_unregister_proto:
> 	proto_unregister(&vsock_proto);
> err_deregister_misc:
>@@ -2680,6 +2874,7 @@ static void __exit vsock_exit(void)
> 	misc_deregister(&vsock_device);
> 	sock_unregister(AF_VSOCK);
> 	proto_unregister(&vsock_proto);
>+	unregister_pernet_subsys(&vsock_sysctl_ops);
> }
>
> const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
>diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
>index 432fcbbd14d4..79bc55eeecb3 100644
>--- a/net/vmw_vsock/hyperv_transport.c
>+++ b/net/vmw_vsock/hyperv_transport.c
>@@ -313,7 +313,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
> 		return;
>
> 	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
>-	sk = vsock_find_bound_socket(&addr);
>+	sk = vsock_find_bound_socket(&addr, vsock_global_dummy_net());

Instead of using `vsock_global_dummy_net()` in each transport, can we 
just provide a new vsock_find_bound_socket() to be used with a `net` 
param (e.g. vsock_find_bound_socket_net)?

The "old" `vsock_find_bound_socket()` can simply call 
vsock_find_bound_socket_net() with the dummy one, so we don't need to 
change anything in the transports that don't care about netns and we 
don't need to expose it to transports.

> 	if (!sk)
> 		return;
>
>diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>index b6569b0ca2bb..4626ba0428ef 100644
>--- a/net/vmw_vsock/virtio_transport.c
>+++ b/net/vmw_vsock/virtio_transport.c
>@@ -536,7 +536,7 @@ static bool virtio_transport_msgzerocopy_allow(void)
> 	return true;
> }
>
>-static bool virtio_transport_seqpacket_allow(u32 remote_cid);
>+static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid);
>
> static struct virtio_transport virtio_transport = {
> 	.transport = {
>@@ -593,7 +593,7 @@ static struct virtio_transport virtio_transport = {
> 	.can_msgzerocopy = virtio_transport_can_msgzerocopy,
> };
>
>-static bool virtio_transport_seqpacket_allow(u32 remote_cid)
>+static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
> {
> 	struct virtio_vsock *vsock;
> 	bool seqpacket_allow;
>@@ -659,6 +659,8 @@ static void virtio_transport_rx_work(struct work_struct *work)
> 			if (payload_len)
> 				virtio_vsock_skb_put(skb, payload_len);
>
>+			virtio_vsock_skb_set_net(skb, vsock_global_dummy_net());
>+			virtio_vsock_skb_set_orig_net_mode(skb, VSOCK_NET_MODE_GLOBAL);
> 			virtio_transport_deliver_tap_pkt(skb);
> 			virtio_transport_recv_pkt(&virtio_transport, skb);
> 		}
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index dcc8a1d5851e..1a9129e33d51 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -1606,9 +1606,9 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
> 	/* The socket must be in connected or bound table
> 	 * otherwise send reset back
> 	 */
>-	sk = vsock_find_connected_socket(&src, &dst);
>+	sk = vsock_find_connected_socket(&src, &dst, vsock_global_dummy_net());
> 	if (!sk) {
>-		sk = vsock_find_bound_socket(&dst);
>+		sk = vsock_find_bound_socket(&dst, vsock_global_dummy_net());
> 		if (!sk) {
> 			(void)virtio_transport_reset_no_sock(t, skb);
> 			goto free_pkt;
>diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
>index 7eccd6708d66..aa0cd2efe561 100644
>--- a/net/vmw_vsock/vmci_transport.c
>+++ b/net/vmw_vsock/vmci_transport.c
>@@ -703,9 +703,11 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
> 	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
> 	vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
>
>-	sk = vsock_find_connected_socket(&src, &dst);
>+	sk = vsock_find_connected_socket(&src, &dst, vsock_global_dummy_net(),
>+					 VSOCK_NET_MODE_GLOBAL);
> 	if (!sk) {
>-		sk = vsock_find_bound_socket(&dst);
>+		sk = vsock_find_bound_socket(&dst, vsock_global_dummy_net(),
>+					     VSOCK_NET_MODE_GLOBAL);

As I mentioned, I'd like to avoid all of these changes and provide a new 
function to take care of netns where the "old" ones hide the dummy 
stuff.

Just to be clear, I'd like to avoid changes in transports that don't 
support netns if it's possible.

If it's a mess, I can reconsider it :-)

Thanks,
Stefano

> 		if (!sk) {
> 			/* We could not find a socket for this specified
> 			 * address.  If this packet is a RST, we just drop it.
>diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
>index 6e78927a598e..1b2fab73e0d0 100644
>--- a/net/vmw_vsock/vsock_loopback.c
>+++ b/net/vmw_vsock/vsock_loopback.c
>@@ -46,7 +46,7 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
> 	return 0;
> }
>
>-static bool vsock_loopback_seqpacket_allow(u32 remote_cid);
>+static bool vsock_loopback_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid);
> static bool vsock_loopback_msgzerocopy_allow(void)
> {
> 	return true;
>@@ -106,7 +106,7 @@ static struct virtio_transport loopback_transport = {
> 	.send_pkt = vsock_loopback_send_pkt,
> };
>
>-static bool vsock_loopback_seqpacket_allow(u32 remote_cid)
>+static bool vsock_loopback_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid)
> {
> 	return true;
> }
>
>-- 
>2.47.3
>
Re: [PATCH net-next v6 3/9] vsock: add netns to vsock core
Posted by kernel test robot 2 weeks ago
Hi Bobby,

kernel test robot noticed the following build errors:

[auto build test ERROR on 949ddfb774fe527cebfa3f769804344940f7ed2e]

url:    https://github.com/intel-lab-lkp/linux/commits/Bobby-Eshleman/vsock-a-per-net-vsock-NS-mode-state/20250917-074823
base:   949ddfb774fe527cebfa3f769804344940f7ed2e
patch link:    https://lore.kernel.org/r/20250916-vsock-vmtest-v6-3-064d2eb0c89d%40meta.com
patch subject: [PATCH net-next v6 3/9] vsock: add netns to vsock core
config: i386-allmodconfig (https://download.01.org/0day-ci/archive/20250918/202509180511.5pJaP7gr-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250918/202509180511.5pJaP7gr-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509180511.5pJaP7gr-lkp@intel.com/

All errors (new ones prefixed by >>):

   net/vmw_vsock/hyperv_transport.c: In function 'hvs_open_connection':
>> net/vmw_vsock/hyperv_transport.c:316:14: error: too few arguments to function 'vsock_find_bound_socket'
     316 |         sk = vsock_find_bound_socket(&addr, vsock_global_dummy_net());
         |              ^~~~~~~~~~~~~~~~~~~~~~~
   In file included from net/vmw_vsock/hyperv_transport.c:15:
   include/net/af_vsock.h:218:14: note: declared here
     218 | struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr, struct net *net,
         |              ^~~~~~~~~~~~~~~~~~~~~~~


vim +/vsock_find_bound_socket +316 net/vmw_vsock/hyperv_transport.c

   294	
   295	static void hvs_open_connection(struct vmbus_channel *chan)
   296	{
   297		guid_t *if_instance, *if_type;
   298		unsigned char conn_from_host;
   299	
   300		struct sockaddr_vm addr;
   301		struct sock *sk, *new = NULL;
   302		struct vsock_sock *vnew = NULL;
   303		struct hvsock *hvs = NULL;
   304		struct hvsock *hvs_new = NULL;
   305		int rcvbuf;
   306		int ret;
   307		int sndbuf;
   308	
   309		if_type = &chan->offermsg.offer.if_type;
   310		if_instance = &chan->offermsg.offer.if_instance;
   311		conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
   312		if (!is_valid_srv_id(if_type))
   313			return;
   314	
   315		hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
 > 316		sk = vsock_find_bound_socket(&addr, vsock_global_dummy_net());
   317		if (!sk)
   318			return;
   319	
   320		lock_sock(sk);
   321		if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
   322		    (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
   323			goto out;
   324	
   325		if (conn_from_host) {
   326			if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
   327				goto out;
   328	
   329			new = vsock_create_connected(sk);
   330			if (!new)
   331				goto out;
   332	
   333			new->sk_state = TCP_SYN_SENT;
   334			vnew = vsock_sk(new);
   335	
   336			hvs_addr_init(&vnew->local_addr, if_type);
   337	
   338			/* Remote peer is always the host */
   339			vsock_addr_init(&vnew->remote_addr,
   340					VMADDR_CID_HOST, VMADDR_PORT_ANY);
   341			vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance);
   342			ret = vsock_assign_transport(vnew, vsock_sk(sk));
   343			/* Transport assigned (looking at remote_addr) must be the
   344			 * same where we received the request.
   345			 */
   346			if (ret || !hvs_check_transport(vnew)) {
   347				sock_put(new);
   348				goto out;
   349			}
   350			hvs_new = vnew->trans;
   351			hvs_new->chan = chan;
   352		} else {
   353			hvs = vsock_sk(sk)->trans;
   354			hvs->chan = chan;
   355		}
   356	
   357		set_channel_read_mode(chan, HV_CALL_DIRECT);
   358	
   359		/* Use the socket buffer sizes as hints for the VMBUS ring size. For
   360		 * server side sockets, 'sk' is the parent socket and thus, this will
   361		 * allow the child sockets to inherit the size from the parent. Keep
   362		 * the mins to the default value and align to page size as per VMBUS
   363		 * requirements.
   364		 * For the max, the socket core library will limit the socket buffer
   365		 * size that can be set by the user, but, since currently, the hv_sock
   366		 * VMBUS ring buffer is physically contiguous allocation, restrict it
   367		 * further.
   368		 * Older versions of hv_sock host side code cannot handle bigger VMBUS
   369		 * ring buffer size. Use the version number to limit the change to newer
   370		 * versions.
   371		 */
   372		if (vmbus_proto_version < VERSION_WIN10_V5) {
   373			sndbuf = RINGBUFFER_HVS_SND_SIZE;
   374			rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
   375		} else {
   376			sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
   377			sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
   378			sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE);
   379			rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
   380			rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
   381			rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
   382		}
   383	
   384		chan->max_pkt_size = HVS_MAX_PKT_SIZE;
   385	
   386		ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
   387				 conn_from_host ? new : sk);
   388		if (ret != 0) {
   389			if (conn_from_host) {
   390				hvs_new->chan = NULL;
   391				sock_put(new);
   392			} else {
   393				hvs->chan = NULL;
   394			}
   395			goto out;
   396		}
   397	
   398		set_per_channel_state(chan, conn_from_host ? new : sk);
   399	
   400		/* This reference will be dropped by hvs_close_connection(). */
   401		sock_hold(conn_from_host ? new : sk);
   402		vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
   403	
   404		/* Set the pending send size to max packet size to always get
   405		 * notifications from the host when there is enough writable space.
   406		 * The host is optimized to send notifications only when the pending
   407		 * size boundary is crossed, and not always.
   408		 */
   409		hvs_set_channel_pending_send_size(chan);
   410	
   411		if (conn_from_host) {
   412			new->sk_state = TCP_ESTABLISHED;
   413			sk_acceptq_added(sk);
   414	
   415			hvs_new->vm_srv_id = *if_type;
   416			hvs_new->host_srv_id = *if_instance;
   417	
   418			vsock_insert_connected(vnew);
   419	
   420			vsock_enqueue_accept(sk, new);
   421		} else {
   422			sk->sk_state = TCP_ESTABLISHED;
   423			sk->sk_socket->state = SS_CONNECTED;
   424	
   425			vsock_insert_connected(vsock_sk(sk));
   426		}
   427	
   428		sk->sk_state_change(sk);
   429	
   430	out:
   431		/* Release refcnt obtained when we called vsock_find_bound_socket() */
   432		sock_put(sk);
   433	
   434		release_sock(sk);
   435	}
   436	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki