vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload.
transport_{g2h,h2g} may become NULL after the NULL check.
Introduce vsock_transport_local_cid() to protect from a potential
null-ptr-deref.
KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f]
RIP: 0010:vsock_find_cid+0x47/0x90
Call Trace:
__vsock_bind+0x4b2/0x720
vsock_bind+0x90/0xe0
__sys_bind+0x14d/0x1e0
__x64_sys_bind+0x6e/0xc0
do_syscall_64+0x92/0x1c0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f]
RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0
Call Trace:
__x64_sys_ioctl+0x12d/0x190
do_syscall_64+0x92/0x1c0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
---
net/vmw_vsock/af_vsock.c | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2e7a3034e965db30b6ee295370d866e6d8b1c341..63a920af5bfe6960306a3e5eeae0cbf30648985e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -531,9 +531,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
}
EXPORT_SYMBOL_GPL(vsock_assign_transport);
+static u32 vsock_transport_local_cid(const struct vsock_transport **transport)
+{
+ u32 cid = VMADDR_CID_ANY;
+
+ mutex_lock(&vsock_register_mutex);
+ if (*transport)
+ cid = (*transport)->get_local_cid();
+ mutex_unlock(&vsock_register_mutex);
+
+ return cid;
+}
+
bool vsock_find_cid(unsigned int cid)
{
- if (transport_g2h && cid == transport_g2h->get_local_cid())
+ if (cid == vsock_transport_local_cid(&transport_g2h))
return true;
if (transport_h2g && cid == VMADDR_CID_HOST)
@@ -2536,18 +2548,17 @@ static long vsock_dev_do_ioctl(struct file *filp,
unsigned int cmd, void __user *ptr)
{
u32 __user *p = ptr;
- u32 cid = VMADDR_CID_ANY;
int retval = 0;
+ u32 cid;
switch (cmd) {
case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
/* To be compatible with the VMCI behavior, we prioritize the
* guest CID instead of well-know host CID (VMADDR_CID_HOST).
*/
- if (transport_g2h)
- cid = transport_g2h->get_local_cid();
- else if (transport_h2g)
- cid = transport_h2g->get_local_cid();
+ cid = vsock_transport_local_cid(&transport_g2h);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_transport_local_cid(&transport_h2g);
if (put_user(cid, p) != 0)
retval = -EFAULT;
--
2.49.0
On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >transport_{g2h,h2g} may become NULL after the NULL check. > >Introduce vsock_transport_local_cid() to protect from a potential >null-ptr-deref. > >KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >RIP: 0010:vsock_find_cid+0x47/0x90 >Call Trace: > __vsock_bind+0x4b2/0x720 > vsock_bind+0x90/0xe0 > __sys_bind+0x14d/0x1e0 > __x64_sys_bind+0x6e/0xc0 > do_syscall_64+0x92/0x1c0 > entry_SYSCALL_64_after_hwframe+0x4b/0x53 > >KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >Call Trace: > __x64_sys_ioctl+0x12d/0x190 > do_syscall_64+0x92/0x1c0 > entry_SYSCALL_64_after_hwframe+0x4b/0x53 > >Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >Signed-off-by: Michal Luczaj <mhal@rbox.co> >--- > net/vmw_vsock/af_vsock.c | 23 +++++++++++++++++------ > 1 file changed, 17 insertions(+), 6 deletions(-) > >diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >index 2e7a3034e965db30b6ee295370d866e6d8b1c341..63a920af5bfe6960306a3e5eeae0cbf30648985e 100644 >--- a/net/vmw_vsock/af_vsock.c >+++ b/net/vmw_vsock/af_vsock.c >@@ -531,9 +531,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) > } > EXPORT_SYMBOL_GPL(vsock_assign_transport); > >+static u32 vsock_transport_local_cid(const struct vsock_transport **transport) Why we need double pointer? >+{ >+ u32 cid = VMADDR_CID_ANY; >+ >+ mutex_lock(&vsock_register_mutex); >+ if (*transport) >+ cid = (*transport)->get_local_cid(); >+ mutex_unlock(&vsock_register_mutex); >+ >+ return cid; >+} >+ > bool vsock_find_cid(unsigned int cid) > { >- if (transport_g2h && cid == transport_g2h->get_local_cid()) >+ if (cid == vsock_transport_local_cid(&transport_g2h)) > return true; > > if (transport_h2g && cid == VMADDR_CID_HOST) >@@ -2536,18 +2548,17 @@ static long vsock_dev_do_ioctl(struct file *filp, > unsigned int cmd, void __user *ptr) > { > u32 __user *p = ptr; >- u32 cid = VMADDR_CID_ANY; > int retval = 0; >+ u32 cid; > > switch (cmd) { > case IOCTL_VM_SOCKETS_GET_LOCAL_CID: > /* To be compatible with the VMCI behavior, we prioritize the > * guest CID instead of well-know host CID (VMADDR_CID_HOST). > */ >- if (transport_g2h) >- cid = transport_g2h->get_local_cid(); >- else if (transport_h2g) >- cid = transport_h2g->get_local_cid(); >+ cid = vsock_transport_local_cid(&transport_g2h); >+ if (cid == VMADDR_CID_ANY) >+ cid = vsock_transport_local_cid(&transport_h2g); I still prefer the old `if ... else if ...`, what is the reason of this change? I may miss the point. But overall LGTM! Thanks, Stefano > > if (put_user(cid, p) != 0) > retval = -EFAULT; > >-- >2.49.0 >
On 6/25/25 10:43, Stefano Garzarella wrote: > On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >> vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >> transport_{g2h,h2g} may become NULL after the NULL check. >> >> Introduce vsock_transport_local_cid() to protect from a potential >> null-ptr-deref. >> >> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >> RIP: 0010:vsock_find_cid+0x47/0x90 >> Call Trace: >> __vsock_bind+0x4b2/0x720 >> vsock_bind+0x90/0xe0 >> __sys_bind+0x14d/0x1e0 >> __x64_sys_bind+0x6e/0xc0 >> do_syscall_64+0x92/0x1c0 >> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >> >> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >> RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >> Call Trace: >> __x64_sys_ioctl+0x12d/0x190 >> do_syscall_64+0x92/0x1c0 >> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >> >> Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >> Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >> Signed-off-by: Michal Luczaj <mhal@rbox.co> >> --- >> net/vmw_vsock/af_vsock.c | 23 +++++++++++++++++------ >> 1 file changed, 17 insertions(+), 6 deletions(-) >> >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >> index 2e7a3034e965db30b6ee295370d866e6d8b1c341..63a920af5bfe6960306a3e5eeae0cbf30648985e 100644 >> --- a/net/vmw_vsock/af_vsock.c >> +++ b/net/vmw_vsock/af_vsock.c >> @@ -531,9 +531,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) >> } >> EXPORT_SYMBOL_GPL(vsock_assign_transport); >> >> +static u32 vsock_transport_local_cid(const struct vsock_transport **transport) > > Why we need double pointer? Because of a possible race. If @transport is `struct vsock_transport*` and we pass `transport_g2h`, the passed non-NULL pointer value may immediately become stale (due to module unload). But if it's `vsock_transport**` and we pass `&transport_g2h`, then we can take the mutex, check `*transport` for NULL and safely go ahead. Or are you saying this could be simplified? >> +{ >> + u32 cid = VMADDR_CID_ANY; >> + >> + mutex_lock(&vsock_register_mutex); >> + if (*transport) >> + cid = (*transport)->get_local_cid(); >> + mutex_unlock(&vsock_register_mutex); >> + >> + return cid; >> +} >> + >> bool vsock_find_cid(unsigned int cid) >> { >> - if (transport_g2h && cid == transport_g2h->get_local_cid()) >> + if (cid == vsock_transport_local_cid(&transport_g2h)) >> return true; >> >> if (transport_h2g && cid == VMADDR_CID_HOST) >> @@ -2536,18 +2548,17 @@ static long vsock_dev_do_ioctl(struct file *filp, >> unsigned int cmd, void __user *ptr) >> { >> u32 __user *p = ptr; >> - u32 cid = VMADDR_CID_ANY; >> int retval = 0; >> + u32 cid; >> >> switch (cmd) { >> case IOCTL_VM_SOCKETS_GET_LOCAL_CID: >> /* To be compatible with the VMCI behavior, we prioritize the >> * guest CID instead of well-know host CID (VMADDR_CID_HOST). >> */ >> - if (transport_g2h) >> - cid = transport_g2h->get_local_cid(); >> - else if (transport_h2g) >> - cid = transport_h2g->get_local_cid(); >> + cid = vsock_transport_local_cid(&transport_g2h); >> + if (cid == VMADDR_CID_ANY) >> + cid = vsock_transport_local_cid(&transport_h2g); > > I still prefer the old `if ... else if ...`, what is the reason of this > change? I may miss the point. Ah, ok, I've just thought such cascade would be cleaner. So is this what you prefer? if (transport_g2h) cid = vsock_transport_local_cid(&transport_g2h); else if (transport_h2g) cid = vsock_transport_local_cid(&transport_h2g); Thanks, Michal
On Wed, Jun 25, 2025 at 11:23:30PM +0200, Michal Luczaj wrote: >On 6/25/25 10:43, Stefano Garzarella wrote: >> On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >>> vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >>> transport_{g2h,h2g} may become NULL after the NULL check. >>> >>> Introduce vsock_transport_local_cid() to protect from a potential >>> null-ptr-deref. >>> >>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>> RIP: 0010:vsock_find_cid+0x47/0x90 >>> Call Trace: >>> __vsock_bind+0x4b2/0x720 >>> vsock_bind+0x90/0xe0 >>> __sys_bind+0x14d/0x1e0 >>> __x64_sys_bind+0x6e/0xc0 >>> do_syscall_64+0x92/0x1c0 >>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>> >>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>> RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >>> Call Trace: >>> __x64_sys_ioctl+0x12d/0x190 >>> do_syscall_64+0x92/0x1c0 >>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>> >>> Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >>> Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >>> Signed-off-by: Michal Luczaj <mhal@rbox.co> >>> --- >>> net/vmw_vsock/af_vsock.c | 23 +++++++++++++++++------ >>> 1 file changed, 17 insertions(+), 6 deletions(-) >>> >>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >>> index 2e7a3034e965db30b6ee295370d866e6d8b1c341..63a920af5bfe6960306a3e5eeae0cbf30648985e 100644 >>> --- a/net/vmw_vsock/af_vsock.c >>> +++ b/net/vmw_vsock/af_vsock.c >>> @@ -531,9 +531,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) >>> } >>> EXPORT_SYMBOL_GPL(vsock_assign_transport); >>> >>> +static u32 vsock_transport_local_cid(const struct vsock_transport **transport) >> >> Why we need double pointer? > >Because of a possible race. If @transport is `struct vsock_transport*` and >we pass `transport_g2h`, the passed non-NULL pointer value may immediately >become stale (due to module unload). But if it's `vsock_transport**` and we >pass `&transport_g2h`, then we can take the mutex, check `*transport` for >NULL and safely go ahead. > >Or are you saying this could be simplified? Nope, you're right! I was still thinking about my old version where we had the switch inside... BTW I'd like to change the name, `vsock_transport_local` prefix is confusing IMO, since it seems related only to the `transport_local`. Another thing I'm worried about is that we'll then start using it on `vsk->transport` when this is only to be used on registered transports (i.e. `static ...`), though, I don't think there's a way to force type checking from the compiler (unless you wrap it in a struct). It's not a big issue, but taking the mutex is useless in that cases. So, if we can't do much, I'd add a comment and make the function name more clear. e.g. vsock_registered_transport_cid() ? or something similar. > >>> +{ >>> + u32 cid = VMADDR_CID_ANY; >>> + >>> + mutex_lock(&vsock_register_mutex); >>> + if (*transport) >>> + cid = (*transport)->get_local_cid(); >>> + mutex_unlock(&vsock_register_mutex); >>> + >>> + return cid; >>> +} >>> + >>> bool vsock_find_cid(unsigned int cid) >>> { >>> - if (transport_g2h && cid == transport_g2h->get_local_cid()) >>> + if (cid == vsock_transport_local_cid(&transport_g2h)) >>> return true; >>> >>> if (transport_h2g && cid == VMADDR_CID_HOST) >>> @@ -2536,18 +2548,17 @@ static long vsock_dev_do_ioctl(struct file *filp, >>> unsigned int cmd, void __user *ptr) >>> { >>> u32 __user *p = ptr; >>> - u32 cid = VMADDR_CID_ANY; >>> int retval = 0; >>> + u32 cid; >>> >>> switch (cmd) { >>> case IOCTL_VM_SOCKETS_GET_LOCAL_CID: >>> /* To be compatible with the VMCI behavior, we prioritize the >>> * guest CID instead of well-know host CID (VMADDR_CID_HOST). >>> */ >>> - if (transport_g2h) >>> - cid = transport_g2h->get_local_cid(); >>> - else if (transport_h2g) >>> - cid = transport_h2g->get_local_cid(); >>> + cid = vsock_transport_local_cid(&transport_g2h); >>> + if (cid == VMADDR_CID_ANY) >>> + cid = vsock_transport_local_cid(&transport_h2g); >> >> I still prefer the old `if ... else if ...`, what is the reason of this >> change? I may miss the point. > >Ah, ok, I've just thought such cascade would be cleaner. > >So is this what you prefer? I usually prefer less changes as possibile, but in this case I see your point, so up to you ;-) In your way we save `cid` initialization and an if, so it's fine. Thanks, Stefano > >if (transport_g2h) > cid = vsock_transport_local_cid(&transport_g2h); >else if (transport_h2g) > cid = vsock_transport_local_cid(&transport_h2g); > >Thanks, >Michal >
On 6/27/25 10:02, Stefano Garzarella wrote: > On Wed, Jun 25, 2025 at 11:23:30PM +0200, Michal Luczaj wrote: >> On 6/25/25 10:43, Stefano Garzarella wrote: >>> On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >>>> vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >>>> transport_{g2h,h2g} may become NULL after the NULL check. >>>> >>>> Introduce vsock_transport_local_cid() to protect from a potential >>>> null-ptr-deref. >>>> >>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>> RIP: 0010:vsock_find_cid+0x47/0x90 >>>> Call Trace: >>>> __vsock_bind+0x4b2/0x720 >>>> vsock_bind+0x90/0xe0 >>>> __sys_bind+0x14d/0x1e0 >>>> __x64_sys_bind+0x6e/0xc0 >>>> do_syscall_64+0x92/0x1c0 >>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>> >>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>> RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >>>> Call Trace: >>>> __x64_sys_ioctl+0x12d/0x190 >>>> do_syscall_64+0x92/0x1c0 >>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>> >>>> Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >>>> Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >>>> Signed-off-by: Michal Luczaj <mhal@rbox.co> >>>> --- >>>> net/vmw_vsock/af_vsock.c | 23 +++++++++++++++++------ >>>> 1 file changed, 17 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >>>> index 2e7a3034e965db30b6ee295370d866e6d8b1c341..63a920af5bfe6960306a3e5eeae0cbf30648985e 100644 >>>> --- a/net/vmw_vsock/af_vsock.c >>>> +++ b/net/vmw_vsock/af_vsock.c >>>> @@ -531,9 +531,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) >>>> } >>>> EXPORT_SYMBOL_GPL(vsock_assign_transport); >>>> >>>> +static u32 vsock_transport_local_cid(const struct vsock_transport **transport) >>> >>> Why we need double pointer? >> >> Because of a possible race. If @transport is `struct vsock_transport*` and >> we pass `transport_g2h`, the passed non-NULL pointer value may immediately >> become stale (due to module unload). But if it's `vsock_transport**` and we >> pass `&transport_g2h`, then we can take the mutex, check `*transport` for >> NULL and safely go ahead. >> >> Or are you saying this could be simplified? > > Nope, you're right! I was still thinking about my old version where we > had the switch inside... > > BTW I'd like to change the name, `vsock_transport_local` prefix is > confusing IMO, since it seems related only to the `transport_local`. > > Another thing I'm worried about is that we'll then start using it on > `vsk->transport` when this is only to be used on registered transports > (i.e. `static ...`), though, I don't think there's a way to force type > checking from the compiler (unless you wrap it in a struct). (...) I've found (on SO[1]) this somewhat hackish compile-time `static`-checking: static u32 __vsock_registered_transport_cid(const struct vsock_transport **transport) { u32 cid = VMADDR_CID_ANY; mutex_lock(&vsock_register_mutex); if (*transport) cid = (*transport)->get_local_cid(); mutex_unlock(&vsock_register_mutex); return cid; } #define ASSERT_REGISTERED_TRANSPORT(t) \ __always_unused static void *__UNIQUE_ID(vsock) = (t) #define vsock_registered_transport_cid(transport) \ ({ \ ASSERT_REGISTERED_TRANSPORT(transport); \ __vsock_registered_transport_cid(transport); \ }) It does the trick, compilation fails on vsock_registered_transport_cid(&vsk->transport): net/vmw_vsock/af_vsock.c: In function ‘vsock_send_shutdown’: net/vmw_vsock/af_vsock.c:565:59: error: initializer element is not constant 565 | __always_unused static void *__UNIQUE_ID(vsock) = (t) | ^ net/vmw_vsock/af_vsock.c:569:9: note: in expansion of macro ‘ASSERT_REGISTERED_TRANSPORT’ 569 | ASSERT_REGISTERED_TRANSPORT(transport); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ net/vmw_vsock/af_vsock.c:626:9: note: in expansion of macro ‘vsock_registered_transport_cid’ 626 | vsock_registered_transport_cid(&vsk->transport); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ But perhaps adding a comment wouldn't hurt either, e.g. /* Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. * Otherwise we may race with module removal. Do not use on * `vsk->transport`. */ ? ...which begs another question: do we stick to the netdev special comment style? See commit 82b8000c28b5 ("net: drop special comment style"). Oh, and come to think of it, we don't really need that (easily contended?) mutex here. Same can be done with RCU. Which should speed up vsock_bind() -> __vsock_bind() -> vsock_find_cid(), right? This is what I mean, roughly: +static u32 vsock_registered_transport_cid(const struct vsock_transport __rcu **trans_ptr) +{ + const struct vsock_transport *transport; + u32 cid = VMADDR_CID_ANY; + + rcu_read_lock(); + transport = rcu_dereference(*trans_ptr); + if (transport) + cid = transport->get_local_cid(); + rcu_read_unlock(); + + return cid; +} ... @@ -2713,6 +2726,7 @@ void vsock_core_unregister(const struct vsock_transport *t) transport_local = NULL; mutex_unlock(&vsock_register_mutex); + synchronize_rcu(); } I've realized I'm throwing multiple unrelated ideas/questions, so let me summarise: 1. Hackish macro can be used to guard against calling vsock_registered_transport_cid() on a non-static variable. 2. We can comment the function to add some context and avoid confusion. 3. Instead of taking mutex in vsock_registered_transport_cid() we can use RCU. > So, if we can't do much, I'd add a comment and make the function name > more clear. e.g. vsock_registered_transport_cid() ? or something > similar. Sure, will do. Thanks! [1]: https://stackoverflow.com/questions/5645695/how-can-i-add-a-static-assert-to-check-if-a-variable-is-static/5672637#5672637
On Sun, Jun 29, 2025 at 11:26:12PM +0200, Michal Luczaj wrote: >On 6/27/25 10:02, Stefano Garzarella wrote: >> On Wed, Jun 25, 2025 at 11:23:30PM +0200, Michal Luczaj wrote: >>> On 6/25/25 10:43, Stefano Garzarella wrote: >>>> On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >>>>> vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >>>>> transport_{g2h,h2g} may become NULL after the NULL check. >>>>> >>>>> Introduce vsock_transport_local_cid() to protect from a potential >>>>> null-ptr-deref. >>>>> >>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>> RIP: 0010:vsock_find_cid+0x47/0x90 >>>>> Call Trace: >>>>> __vsock_bind+0x4b2/0x720 >>>>> vsock_bind+0x90/0xe0 >>>>> __sys_bind+0x14d/0x1e0 >>>>> __x64_sys_bind+0x6e/0xc0 >>>>> do_syscall_64+0x92/0x1c0 >>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>> >>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>> RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >>>>> Call Trace: >>>>> __x64_sys_ioctl+0x12d/0x190 >>>>> do_syscall_64+0x92/0x1c0 >>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>> >>>>> Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >>>>> Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >>>>> Signed-off-by: Michal Luczaj <mhal@rbox.co> >>>>> --- >>>>> net/vmw_vsock/af_vsock.c | 23 +++++++++++++++++------ >>>>> 1 file changed, 17 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >>>>> index 2e7a3034e965db30b6ee295370d866e6d8b1c341..63a920af5bfe6960306a3e5eeae0cbf30648985e 100644 >>>>> --- a/net/vmw_vsock/af_vsock.c >>>>> +++ b/net/vmw_vsock/af_vsock.c >>>>> @@ -531,9 +531,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) >>>>> } >>>>> EXPORT_SYMBOL_GPL(vsock_assign_transport); >>>>> >>>>> +static u32 vsock_transport_local_cid(const struct vsock_transport **transport) >>>> >>>> Why we need double pointer? >>> >>> Because of a possible race. If @transport is `struct vsock_transport*` and >>> we pass `transport_g2h`, the passed non-NULL pointer value may immediately >>> become stale (due to module unload). But if it's `vsock_transport**` and we >>> pass `&transport_g2h`, then we can take the mutex, check `*transport` for >>> NULL and safely go ahead. >>> >>> Or are you saying this could be simplified? >> >> Nope, you're right! I was still thinking about my old version where we >> had the switch inside... >> >> BTW I'd like to change the name, `vsock_transport_local` prefix is >> confusing IMO, since it seems related only to the `transport_local`. >> >> Another thing I'm worried about is that we'll then start using it on >> `vsk->transport` when this is only to be used on registered transports >> (i.e. `static ...`), though, I don't think there's a way to force type >> checking from the compiler (unless you wrap it in a struct). (...) > >I've found (on SO[1]) this somewhat hackish compile-time `static`-checking: > >static u32 __vsock_registered_transport_cid(const struct vsock_transport >**transport) >{ > u32 cid = VMADDR_CID_ANY; > > mutex_lock(&vsock_register_mutex); > if (*transport) > cid = (*transport)->get_local_cid(); > mutex_unlock(&vsock_register_mutex); > > return cid; >} > >#define ASSERT_REGISTERED_TRANSPORT(t) \ > __always_unused static void *__UNIQUE_ID(vsock) = (t) > >#define vsock_registered_transport_cid(transport) \ >({ \ > ASSERT_REGISTERED_TRANSPORT(transport); \ > __vsock_registered_transport_cid(transport); \ >}) > >It does the trick, compilation fails on >vsock_registered_transport_cid(&vsk->transport): > >net/vmw_vsock/af_vsock.c: In function ‘vsock_send_shutdown’: >net/vmw_vsock/af_vsock.c:565:59: error: initializer element is not constant > 565 | __always_unused static void *__UNIQUE_ID(vsock) = (t) > | ^ >net/vmw_vsock/af_vsock.c:569:9: note: in expansion of macro >‘ASSERT_REGISTERED_TRANSPORT’ > 569 | ASSERT_REGISTERED_TRANSPORT(transport); > \ > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ >net/vmw_vsock/af_vsock.c:626:9: note: in expansion of macro >‘vsock_registered_transport_cid’ > 626 | vsock_registered_transport_cid(&vsk->transport); > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > >But perhaps adding a comment wouldn't hurt either, e.g. > >/* Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. > * Otherwise we may race with module removal. Do not use on > * `vsk->transport`. > */ Yeah, I'd just go with the comment, without introduce complex macros. Also because in the worst case we don't do anything wrong. BTW if we have some macros already defined in the kernel that we can re-use, it's fine. > >? ...which begs another question: do we stick to the netdev special comment >style? See commit 82b8000c28b5 ("net: drop special comment style"). If checkpatch is fine, I'm fine :-) > >Oh, and come to think of it, we don't really need that (easily contended?) >mutex here. Same can be done with RCU. Which should speed up vsock_bind() >-> __vsock_bind() -> vsock_find_cid(), right? This is what I mean, roughly: > >+static u32 vsock_registered_transport_cid(const struct vsock_transport >__rcu **trans_ptr) >+{ >+ const struct vsock_transport *transport; >+ u32 cid = VMADDR_CID_ANY; >+ >+ rcu_read_lock(); >+ transport = rcu_dereference(*trans_ptr); >+ if (transport) >+ cid = transport->get_local_cid(); >+ rcu_read_unlock(); >+ >+ return cid; >+} >... >@@ -2713,6 +2726,7 @@ void vsock_core_unregister(const struct >vsock_transport *t) > transport_local = NULL; > > mutex_unlock(&vsock_register_mutex); >+ synchronize_rcu(); > } > >I've realized I'm throwing multiple unrelated ideas/questions, so let me >summarise: >1. Hackish macro can be used to guard against calling >vsock_registered_transport_cid() on a non-static variable. >2. We can comment the function to add some context and avoid confusion. I'd go with 2. >3. Instead of taking mutex in vsock_registered_transport_cid() we can use RCU. Since the vsock_bind() is not in the hot path, maybe a mutex is fine. WDYT? Thanks, Stefano > >> So, if we can't do much, I'd add a comment and make the function name >> more clear. e.g. vsock_registered_transport_cid() ? or something >> similar. > >Sure, will do. > >Thanks! > >[1]: >https://stackoverflow.com/questions/5645695/how-can-i-add-a-static-assert-to-check-if-a-variable-is-static/5672637#5672637 >
On 6/30/25 11:05, Stefano Garzarella wrote: > On Sun, Jun 29, 2025 at 11:26:12PM +0200, Michal Luczaj wrote: >> On 6/27/25 10:02, Stefano Garzarella wrote: >>> On Wed, Jun 25, 2025 at 11:23:30PM +0200, Michal Luczaj wrote: >>>> On 6/25/25 10:43, Stefano Garzarella wrote: >>>>> On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >>>>>> vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >>>>>> transport_{g2h,h2g} may become NULL after the NULL check. >>>>>> >>>>>> Introduce vsock_transport_local_cid() to protect from a potential >>>>>> null-ptr-deref. >>>>>> >>>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>>> RIP: 0010:vsock_find_cid+0x47/0x90 >>>>>> Call Trace: >>>>>> __vsock_bind+0x4b2/0x720 >>>>>> vsock_bind+0x90/0xe0 >>>>>> __sys_bind+0x14d/0x1e0 >>>>>> __x64_sys_bind+0x6e/0xc0 >>>>>> do_syscall_64+0x92/0x1c0 >>>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>>> >>>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>>> RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >>>>>> Call Trace: >>>>>> __x64_sys_ioctl+0x12d/0x190 >>>>>> do_syscall_64+0x92/0x1c0 >>>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>>> >>>>>> Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >>>>>> Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >>>>>> Signed-off-by: Michal Luczaj <mhal@rbox.co> ... >> Oh, and come to think of it, we don't really need that (easily contended?) >> mutex here. Same can be done with RCU. Which should speed up vsock_bind() >> -> __vsock_bind() -> vsock_find_cid(), right? This is what I mean, roughly: >> >> +static u32 vsock_registered_transport_cid(const struct vsock_transport >> __rcu **trans_ptr) >> +{ >> + const struct vsock_transport *transport; >> + u32 cid = VMADDR_CID_ANY; >> + >> + rcu_read_lock(); >> + transport = rcu_dereference(*trans_ptr); >> + if (transport) >> + cid = transport->get_local_cid(); >> + rcu_read_unlock(); >> + >> + return cid; >> +} >> ... >> @@ -2713,6 +2726,7 @@ void vsock_core_unregister(const struct >> vsock_transport *t) >> transport_local = NULL; >> >> mutex_unlock(&vsock_register_mutex); >> + synchronize_rcu(); >> } >> >> I've realized I'm throwing multiple unrelated ideas/questions, so let me >> summarise: >> 1. Hackish macro can be used to guard against calling >> vsock_registered_transport_cid() on a non-static variable. >> 2. We can comment the function to add some context and avoid confusion. > > I'd go with 2. All right, will do. >> 3. Instead of taking mutex in vsock_registered_transport_cid() we can use RCU. > > Since the vsock_bind() is not in the hot path, maybe a mutex is fine. > WDYT? I wrote a benchmark that attempts (and fails due to a non-existing CID) to bind() 100s of vsocks in multiple threads. `perf lock con` shows that this mutex is contended, and things are slowed down by 100+% compared with RCU approach. Which makes sense: every explicit vsock bind() across the whole system would need to acquire the mutex. And now we're also taking the same mutex in vsock_assign_transport(), i.e. during connect(). But maybe such stress testing is just unrealistic, I really don't know.
On Mon, Jun 30, 2025 at 01:02:26PM +0200, Michal Luczaj wrote: >On 6/30/25 11:05, Stefano Garzarella wrote: >> On Sun, Jun 29, 2025 at 11:26:12PM +0200, Michal Luczaj wrote: >>> On 6/27/25 10:02, Stefano Garzarella wrote: >>>> On Wed, Jun 25, 2025 at 11:23:30PM +0200, Michal Luczaj wrote: >>>>> On 6/25/25 10:43, Stefano Garzarella wrote: >>>>>> On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >>>>>>> vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >>>>>>> transport_{g2h,h2g} may become NULL after the NULL check. >>>>>>> >>>>>>> Introduce vsock_transport_local_cid() to protect from a potential >>>>>>> null-ptr-deref. >>>>>>> >>>>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>>>> RIP: 0010:vsock_find_cid+0x47/0x90 >>>>>>> Call Trace: >>>>>>> __vsock_bind+0x4b2/0x720 >>>>>>> vsock_bind+0x90/0xe0 >>>>>>> __sys_bind+0x14d/0x1e0 >>>>>>> __x64_sys_bind+0x6e/0xc0 >>>>>>> do_syscall_64+0x92/0x1c0 >>>>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>>>> >>>>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>>>> RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >>>>>>> Call Trace: >>>>>>> __x64_sys_ioctl+0x12d/0x190 >>>>>>> do_syscall_64+0x92/0x1c0 >>>>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>>>> >>>>>>> Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >>>>>>> Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >>>>>>> Signed-off-by: Michal Luczaj <mhal@rbox.co> >... >>> Oh, and come to think of it, we don't really need that (easily contended?) >>> mutex here. Same can be done with RCU. Which should speed up vsock_bind() >>> -> __vsock_bind() -> vsock_find_cid(), right? This is what I mean, roughly: >>> >>> +static u32 vsock_registered_transport_cid(const struct vsock_transport >>> __rcu **trans_ptr) >>> +{ >>> + const struct vsock_transport *transport; >>> + u32 cid = VMADDR_CID_ANY; >>> + >>> + rcu_read_lock(); >>> + transport = rcu_dereference(*trans_ptr); >>> + if (transport) >>> + cid = transport->get_local_cid(); >>> + rcu_read_unlock(); >>> + >>> + return cid; >>> +} >>> ... >>> @@ -2713,6 +2726,7 @@ void vsock_core_unregister(const struct >>> vsock_transport *t) >>> transport_local = NULL; >>> >>> mutex_unlock(&vsock_register_mutex); >>> + synchronize_rcu(); >>> } >>> >>> I've realized I'm throwing multiple unrelated ideas/questions, so let me >>> summarise: >>> 1. Hackish macro can be used to guard against calling >>> vsock_registered_transport_cid() on a non-static variable. >>> 2. We can comment the function to add some context and avoid confusion. >> >> I'd go with 2. > >All right, will do. > >>> 3. Instead of taking mutex in vsock_registered_transport_cid() we can use RCU. >> >> Since the vsock_bind() is not in the hot path, maybe a mutex is fine. >> WDYT? > >I wrote a benchmark that attempts (and fails due to a non-existing CID) to >bind() 100s of vsocks in multiple threads. `perf lock con` shows that this >mutex is contended, and things are slowed down by 100+% compared with RCU >approach. Which makes sense: every explicit vsock bind() across the whole >system would need to acquire the mutex. And now we're also taking the same >mutex in vsock_assign_transport(), i.e. during connect(). But maybe such >stress testing is just unrealistic, I really don't know. > I still don't think it's a hot path to optimize, but I'm not totally against it. If you want to do it though, I would say do it in a separate patch. Thanks, Stefano
On 7/1/25 12:34, Stefano Garzarella wrote: > On Mon, Jun 30, 2025 at 01:02:26PM +0200, Michal Luczaj wrote: >> On 6/30/25 11:05, Stefano Garzarella wrote: >>> On Sun, Jun 29, 2025 at 11:26:12PM +0200, Michal Luczaj wrote: >>>> On 6/27/25 10:02, Stefano Garzarella wrote: >>>>> On Wed, Jun 25, 2025 at 11:23:30PM +0200, Michal Luczaj wrote: >>>>>> On 6/25/25 10:43, Stefano Garzarella wrote: >>>>>>> On Fri, Jun 20, 2025 at 09:52:43PM +0200, Michal Luczaj wrote: >>>>>>>> vsock_find_cid() and vsock_dev_do_ioctl() may race with module unload. >>>>>>>> transport_{g2h,h2g} may become NULL after the NULL check. >>>>>>>> >>>>>>>> Introduce vsock_transport_local_cid() to protect from a potential >>>>>>>> null-ptr-deref. >>>>>>>> >>>>>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>>>>> RIP: 0010:vsock_find_cid+0x47/0x90 >>>>>>>> Call Trace: >>>>>>>> __vsock_bind+0x4b2/0x720 >>>>>>>> vsock_bind+0x90/0xe0 >>>>>>>> __sys_bind+0x14d/0x1e0 >>>>>>>> __x64_sys_bind+0x6e/0xc0 >>>>>>>> do_syscall_64+0x92/0x1c0 >>>>>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>>>>> >>>>>>>> KASAN: null-ptr-deref in range [0x0000000000000118-0x000000000000011f] >>>>>>>> RIP: 0010:vsock_dev_do_ioctl.isra.0+0x58/0xf0 >>>>>>>> Call Trace: >>>>>>>> __x64_sys_ioctl+0x12d/0x190 >>>>>>>> do_syscall_64+0x92/0x1c0 >>>>>>>> entry_SYSCALL_64_after_hwframe+0x4b/0x53 >>>>>>>> >>>>>>>> Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") >>>>>>>> Suggested-by: Stefano Garzarella <sgarzare@redhat.com> >>>>>>>> Signed-off-by: Michal Luczaj <mhal@rbox.co> >> ... >>>> Oh, and come to think of it, we don't really need that (easily contended?) >>>> mutex here. Same can be done with RCU. Which should speed up vsock_bind() >>>> -> __vsock_bind() -> vsock_find_cid(), right? This is what I mean, roughly: >>>> >>>> +static u32 vsock_registered_transport_cid(const struct vsock_transport >>>> __rcu **trans_ptr) >>>> +{ >>>> + const struct vsock_transport *transport; >>>> + u32 cid = VMADDR_CID_ANY; >>>> + >>>> + rcu_read_lock(); >>>> + transport = rcu_dereference(*trans_ptr); >>>> + if (transport) >>>> + cid = transport->get_local_cid(); >>>> + rcu_read_unlock(); >>>> + >>>> + return cid; >>>> +} >>>> ... >>>> @@ -2713,6 +2726,7 @@ void vsock_core_unregister(const struct >>>> vsock_transport *t) >>>> transport_local = NULL; >>>> >>>> mutex_unlock(&vsock_register_mutex); >>>> + synchronize_rcu(); >>>> } >>>> >>>> I've realized I'm throwing multiple unrelated ideas/questions, so let me >>>> summarise: >>>> 1. Hackish macro can be used to guard against calling >>>> vsock_registered_transport_cid() on a non-static variable. >>>> 2. We can comment the function to add some context and avoid confusion. >>> >>> I'd go with 2. >> >> All right, will do. >> >>>> 3. Instead of taking mutex in vsock_registered_transport_cid() we can use RCU. >>> >>> Since the vsock_bind() is not in the hot path, maybe a mutex is fine. >>> WDYT? >> >> I wrote a benchmark that attempts (and fails due to a non-existing CID) to >> bind() 100s of vsocks in multiple threads. `perf lock con` shows that this >> mutex is contended, and things are slowed down by 100+% compared with RCU >> approach. Which makes sense: every explicit vsock bind() across the whole >> system would need to acquire the mutex. And now we're also taking the same >> mutex in vsock_assign_transport(), i.e. during connect(). But maybe such >> stress testing is just unrealistic, I really don't know. >> > > I still don't think it's a hot path to optimize, but I'm not totally > against it. If you want to do it though, I would say do it in a separate > patch. All right, so here's v3: https://lore.kernel.org/netdev/20250702-vsock-transports-toctou-v3-0-0a7e2e692987@rbox.co/ Thanks, Michal
© 2016 - 2025 Red Hat, Inc.