fs/nfsd/nfs4recover.c | 8 ++++++++ fs/nfsd/nfs4state.c | 4 ++++ fs/nfsd/nfsctl.c | 2 ++ 3 files changed, 14 insertions(+)
Checking whether tracking callbacks can be called based on whether
nn->client_tracking_ops is NULL may lead to callbacks being invoked
before tracking initialization completes, causing resource access
violations (UAF, NULL pointer dereference). Examples:
1) nfsd4_client_tracking_init
// set nn->client_tracking_ops
nfsd4_cld_tracking_init
nfs4_cld_state_init
nn->reclaim_str_hashtbl = kmalloc_array
... // error path, goto err
nfs4_cld_state_shutdown
kfree(nn->reclaim_str_hashtbl)
write_v4_end_grace
nfsd4_end_grace
nfsd4_record_grace_done
nfsd4_cld_grace_done
nfs4_release_reclaim
nn->reclaim_str_hashtbl[i]
// UAF
// clear nn->client_tracking_ops
2) nfsd4_client_tracking_init
// set nn->client_tracking_ops
nfsd4_cld_tracking_init
write_v4_end_grace
nfsd4_end_grace
nfsd4_record_grace_done
nfsd4_cld_grace_done
alloc_cld_upcall
cn = nn->cld_net
spin_lock // cn->cn_lock
// NULL deref
// error path, skip init pipe
__nfsd4_init_cld_pipe
cn = kzalloc
nn->cld_net = cn
// clear nn->client_tracking_ops
After nfsd mounts, users can trigger grace_done callbacks via
/proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed
in error paths, this causes access violations.
Resolve the issue by leveraging nfsd_mutex to prevent concurrency.
Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net")
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
---
Changes in v2:
Use nfsd_mutex instead of adding a new flag to prevent concurrency.
fs/nfsd/nfs4recover.c | 8 ++++++++
fs/nfsd/nfs4state.c | 4 ++++
fs/nfsd/nfsctl.c | 2 ++
3 files changed, 14 insertions(+)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 82785db730d9..8ac089f8134c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error)
if (error == -ENOENT) {
printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
"Reboot recovery will not function correctly!\n");
+ mutex_lock(&nfsd_mutex);
nfsd4_client_tracking_exit(clp->net);
+ mutex_unlock(&nfsd_mutex);
}
}
@@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ mutex_lock(&nfsd_mutex);
if (nn->client_tracking_ops)
nn->client_tracking_ops->create(clp);
+ mutex_unlock(&nfsd_mutex);
}
void
@@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ mutex_lock(&nfsd_mutex);
if (nn->client_tracking_ops)
nn->client_tracking_ops->remove(clp);
+ mutex_unlock(&nfsd_mutex);
}
int
@@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ mutex_lock(&nfsd_mutex);
if (nn->client_tracking_ops)
return nn->client_tracking_ops->check(clp);
+ mutex_unlock(&nfsd_mutex);
return -EOPNOTSUPP;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5694987f86f..2794fdc8b678 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp)
nn->reclaim_str_hashtbl_size) {
printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
clp->net->ns.inum);
+ mutex_lock(&nfsd_mutex);
nfsd4_end_grace(nn);
+ mutex_unlock(&nfsd_mutex);
}
}
@@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn)
lt.new_timeo = 0;
goto out;
}
+ mutex_lock(&nfsd_mutex);
nfsd4_end_grace(nn);
+ mutex_unlock(&nfsd_mutex);
spin_lock(&nn->s2s_cp_lock);
idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3f3e9f6c4250..649850b4bb60 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
if (!nn->nfsd_serv)
return -EBUSY;
trace_nfsd_end_grace(netns(file));
+ mutex_lock(&nfsd_mutex);
nfsd4_end_grace(nn);
+ mutex_lock(&nfsd_mutex);
break;
default:
return -EINVAL;
--
2.46.1
On Thu, 12 Jun 2025, Li Lingfeng wrote: > Checking whether tracking callbacks can be called based on whether > nn->client_tracking_ops is NULL may lead to callbacks being invoked > before tracking initialization completes, causing resource access > violations (UAF, NULL pointer dereference). Examples: > > 1) nfsd4_client_tracking_init > // set nn->client_tracking_ops > nfsd4_cld_tracking_init > nfs4_cld_state_init > nn->reclaim_str_hashtbl = kmalloc_array > ... // error path, goto err > nfs4_cld_state_shutdown > kfree(nn->reclaim_str_hashtbl) > write_v4_end_grace I suspect the problem here is that write_v4_end_grace() is one of the few write_op functions which doesn't take nfsd_mutex. It should hold that lock while testing ->nfsd_serv and calling nfsd4_end_grace() write_filehandle(), write_unlock_ip(), and write_unlock_fs() also don't take that lock. They don't even check if nfsd_serv is NULL. I suspect they all should. NeilBrown > nfsd4_end_grace > nfsd4_record_grace_done > nfsd4_cld_grace_done > nfs4_release_reclaim > nn->reclaim_str_hashtbl[i] > // UAF > // clear nn->client_tracking_ops > > 2) nfsd4_client_tracking_init > // set nn->client_tracking_ops > nfsd4_cld_tracking_init > write_v4_end_grace > nfsd4_end_grace > nfsd4_record_grace_done > nfsd4_cld_grace_done > alloc_cld_upcall > cn = nn->cld_net > spin_lock // cn->cn_lock > // NULL deref > // error path, skip init pipe > __nfsd4_init_cld_pipe > cn = kzalloc > nn->cld_net = cn > // clear nn->client_tracking_ops > > After nfsd mounts, users can trigger grace_done callbacks via > /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed > in error paths, this causes access violations. > > Resolve the issue by leveraging nfsd_mutex to prevent concurrency. > > Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net") > Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com> > --- > Changes in v2: > Use nfsd_mutex instead of adding a new flag to prevent concurrency. > fs/nfsd/nfs4recover.c | 8 ++++++++ > fs/nfsd/nfs4state.c | 4 ++++ > fs/nfsd/nfsctl.c | 2 ++ > 3 files changed, 14 insertions(+) > > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c > index 82785db730d9..8ac089f8134c 100644 > --- a/fs/nfsd/nfs4recover.c > +++ b/fs/nfsd/nfs4recover.c > @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error) > if (error == -ENOENT) { > printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " > "Reboot recovery will not function correctly!\n"); > + mutex_lock(&nfsd_mutex); > nfsd4_client_tracking_exit(clp->net); > + mutex_unlock(&nfsd_mutex); > } > } > > @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > nn->client_tracking_ops->create(clp); > + mutex_unlock(&nfsd_mutex); > } > > void > @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > nn->client_tracking_ops->remove(clp); > + mutex_unlock(&nfsd_mutex); > } > > int > @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > return nn->client_tracking_ops->check(clp); > + mutex_unlock(&nfsd_mutex); > > return -EOPNOTSUPP; > } > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index d5694987f86f..2794fdc8b678 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp) > nn->reclaim_str_hashtbl_size) { > printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", > clp->net->ns.inum); > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_unlock(&nfsd_mutex); > } > } > > @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn) > lt.new_timeo = 0; > goto out; > } > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_unlock(&nfsd_mutex); > > spin_lock(&nn->s2s_cp_lock); > idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 3f3e9f6c4250..649850b4bb60 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) > if (!nn->nfsd_serv) > return -EBUSY; > trace_nfsd_end_grace(netns(file)); > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_lock(&nfsd_mutex); > break; > default: > return -EINVAL; > -- > 2.46.1 > >
On Thu, 2025-06-12 at 11:55 +0800, Li Lingfeng wrote: > Checking whether tracking callbacks can be called based on whether > nn->client_tracking_ops is NULL may lead to callbacks being invoked > before tracking initialization completes, causing resource access > violations (UAF, NULL pointer dereference). Examples: > > 1) nfsd4_client_tracking_init > // set nn->client_tracking_ops > nfsd4_cld_tracking_init > nfs4_cld_state_init > nn->reclaim_str_hashtbl = kmalloc_array > ... // error path, goto err > nfs4_cld_state_shutdown > kfree(nn->reclaim_str_hashtbl) > write_v4_end_grace > nfsd4_end_grace > nfsd4_record_grace_done > nfsd4_cld_grace_done > nfs4_release_reclaim > nn->reclaim_str_hashtbl[i] > // UAF > // clear nn->client_tracking_ops > > 2) nfsd4_client_tracking_init > // set nn->client_tracking_ops > nfsd4_cld_tracking_init > write_v4_end_grace > nfsd4_end_grace > nfsd4_record_grace_done > nfsd4_cld_grace_done > alloc_cld_upcall > cn = nn->cld_net > spin_lock // cn->cn_lock > // NULL deref > // error path, skip init pipe > __nfsd4_init_cld_pipe > cn = kzalloc > nn->cld_net = cn > // clear nn->client_tracking_ops > Have you seen this race in the wild? Looking at this more closely, I don't think this race is possible. You'd need to invoke the ->init routine concurrently from two different tasks, but nfsd4_client_tracking_init is called during net ns initialization, which should ensure that only one task invokes it. > After nfsd mounts, users can trigger grace_done callbacks via > /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed > in error paths, this causes access violations. > > Resolve the issue by leveraging nfsd_mutex to prevent concurrency. > > Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net") > Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com> > --- > Changes in v2: > Use nfsd_mutex instead of adding a new flag to prevent concurrency. > fs/nfsd/nfs4recover.c | 8 ++++++++ > fs/nfsd/nfs4state.c | 4 ++++ > fs/nfsd/nfsctl.c | 2 ++ > 3 files changed, 14 insertions(+) > > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c > index 82785db730d9..8ac089f8134c 100644 > --- a/fs/nfsd/nfs4recover.c > +++ b/fs/nfsd/nfs4recover.c > @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error) > if (error == -ENOENT) { > printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " > "Reboot recovery will not function correctly!\n"); > + mutex_lock(&nfsd_mutex); > nfsd4_client_tracking_exit(clp->net); > + mutex_unlock(&nfsd_mutex); > } > } > > @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > nn->client_tracking_ops->create(clp); > + mutex_unlock(&nfsd_mutex); > } > > void > @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > nn->client_tracking_ops->remove(clp); > + mutex_unlock(&nfsd_mutex); > } > > int > @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > return nn->client_tracking_ops->check(clp); > + mutex_unlock(&nfsd_mutex); > > return -EOPNOTSUPP; > } > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index d5694987f86f..2794fdc8b678 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp) > nn->reclaim_str_hashtbl_size) { > printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", > clp->net->ns.inum); > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_unlock(&nfsd_mutex); > } > } > > @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn) > lt.new_timeo = 0; > goto out; > } > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_unlock(&nfsd_mutex); > > spin_lock(&nn->s2s_cp_lock); > idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 3f3e9f6c4250..649850b4bb60 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) > if (!nn->nfsd_serv) > return -EBUSY; > trace_nfsd_end_grace(netns(file)); > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_lock(&nfsd_mutex); > break; > default: > return -EINVAL; -- Jeff Layton <jlayton@kernel.org>
On Wed, 2025-06-18 at 08:35 -0400, Jeff Layton wrote: > On Thu, 2025-06-12 at 11:55 +0800, Li Lingfeng wrote: > > Checking whether tracking callbacks can be called based on whether > > nn->client_tracking_ops is NULL may lead to callbacks being invoked > > before tracking initialization completes, causing resource access > > violations (UAF, NULL pointer dereference). Examples: > > > > 1) nfsd4_client_tracking_init > > // set nn->client_tracking_ops > > nfsd4_cld_tracking_init > > nfs4_cld_state_init > > nn->reclaim_str_hashtbl = kmalloc_array > > ... // error path, goto err > > nfs4_cld_state_shutdown > > kfree(nn->reclaim_str_hashtbl) > > write_v4_end_grace > > nfsd4_end_grace > > nfsd4_record_grace_done > > nfsd4_cld_grace_done > > nfs4_release_reclaim > > nn->reclaim_str_hashtbl[i] > > // UAF > > // clear nn->client_tracking_ops > > > > 2) nfsd4_client_tracking_init > > // set nn->client_tracking_ops > > nfsd4_cld_tracking_init > > write_v4_end_grace > > nfsd4_end_grace > > nfsd4_record_grace_done > > nfsd4_cld_grace_done > > alloc_cld_upcall > > cn = nn->cld_net > > spin_lock // cn->cn_lock > > // NULL deref > > // error path, skip init pipe > > __nfsd4_init_cld_pipe > > cn = kzalloc > > nn->cld_net = cn > > // clear nn->client_tracking_ops > > > > > Have you seen this race in the wild? > > Looking at this more closely, I don't think this race is possible. > You'd need to invoke the ->init routine concurrently from two different > tasks, but nfsd4_client_tracking_init is called during net ns > initialization, which should ensure that only one task invokes it. > My bad. It's not called during net namespace initialization, but during server startup. But, the nfsd_mutex is held during this initialization, so I still don't think this race can happen. > > > > After nfsd mounts, users can trigger grace_done callbacks via > > /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed > > in error paths, this causes access violations. > > > > Resolve the issue by leveraging nfsd_mutex to prevent concurrency. > > > > Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net") > > Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com> > > --- > > Changes in v2: > > Use nfsd_mutex instead of adding a new flag to prevent concurrency. > > fs/nfsd/nfs4recover.c | 8 ++++++++ > > fs/nfsd/nfs4state.c | 4 ++++ > > fs/nfsd/nfsctl.c | 2 ++ > > 3 files changed, 14 insertions(+) > > > > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c > > index 82785db730d9..8ac089f8134c 100644 > > --- a/fs/nfsd/nfs4recover.c > > +++ b/fs/nfsd/nfs4recover.c > > @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error) > > if (error == -ENOENT) { > > printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " > > "Reboot recovery will not function correctly!\n"); > > + mutex_lock(&nfsd_mutex); > > nfsd4_client_tracking_exit(clp->net); > > + mutex_unlock(&nfsd_mutex); > > } > > } > > > > @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp) > > { > > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > > > + mutex_lock(&nfsd_mutex); > > if (nn->client_tracking_ops) > > nn->client_tracking_ops->create(clp); > > + mutex_unlock(&nfsd_mutex); > > } > > > > void > > @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp) > > { > > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > > > + mutex_lock(&nfsd_mutex); > > if (nn->client_tracking_ops) > > nn->client_tracking_ops->remove(clp); > > + mutex_unlock(&nfsd_mutex); > > } > > > > int > > @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) > > { > > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > > > + mutex_lock(&nfsd_mutex); > > if (nn->client_tracking_ops) > > return nn->client_tracking_ops->check(clp); > > + mutex_unlock(&nfsd_mutex); > > > > return -EOPNOTSUPP; > > } > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index d5694987f86f..2794fdc8b678 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp) > > nn->reclaim_str_hashtbl_size) { > > printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", > > clp->net->ns.inum); > > + mutex_lock(&nfsd_mutex); > > nfsd4_end_grace(nn); > > + mutex_unlock(&nfsd_mutex); > > } > > } > > > > @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn) > > lt.new_timeo = 0; > > goto out; > > } > > + mutex_lock(&nfsd_mutex); > > nfsd4_end_grace(nn); > > + mutex_unlock(&nfsd_mutex); > > > > spin_lock(&nn->s2s_cp_lock); > > idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > > index 3f3e9f6c4250..649850b4bb60 100644 > > --- a/fs/nfsd/nfsctl.c > > +++ b/fs/nfsd/nfsctl.c > > @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) > > if (!nn->nfsd_serv) > > return -EBUSY; > > trace_nfsd_end_grace(netns(file)); > > + mutex_lock(&nfsd_mutex); > > nfsd4_end_grace(nn); > > + mutex_lock(&nfsd_mutex); > > break; > > default: > > return -EINVAL; -- Jeff Layton <jlayton@kernel.org>
On Thu, 2025-06-12 at 11:55 +0800, Li Lingfeng wrote: > Checking whether tracking callbacks can be called based on whether > nn->client_tracking_ops is NULL may lead to callbacks being invoked > before tracking initialization completes, causing resource access > violations (UAF, NULL pointer dereference). Examples: > > 1) nfsd4_client_tracking_init > // set nn->client_tracking_ops > nfsd4_cld_tracking_init > nfs4_cld_state_init > nn->reclaim_str_hashtbl = kmalloc_array > ... // error path, goto err > nfs4_cld_state_shutdown > kfree(nn->reclaim_str_hashtbl) > write_v4_end_grace > nfsd4_end_grace > nfsd4_record_grace_done > nfsd4_cld_grace_done > nfs4_release_reclaim > nn->reclaim_str_hashtbl[i] > // UAF > // clear nn->client_tracking_ops > > 2) nfsd4_client_tracking_init > // set nn->client_tracking_ops > nfsd4_cld_tracking_init > write_v4_end_grace > nfsd4_end_grace > nfsd4_record_grace_done > nfsd4_cld_grace_done > alloc_cld_upcall > cn = nn->cld_net > spin_lock // cn->cn_lock > // NULL deref > // error path, skip init pipe > __nfsd4_init_cld_pipe > cn = kzalloc > nn->cld_net = cn > // clear nn->client_tracking_ops > > After nfsd mounts, users can trigger grace_done callbacks via > /proc/fs/nfsd/v4_end_grace. If resources are uninitialized or freed > in error paths, this causes access violations. > > Resolve the issue by leveraging nfsd_mutex to prevent concurrency. > > Fixes: 52e19c09a183 ("nfsd: make reclaim_str_hashtbl allocated per net") > Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com> > --- > Changes in v2: > Use nfsd_mutex instead of adding a new flag to prevent concurrency. > fs/nfsd/nfs4recover.c | 8 ++++++++ > fs/nfsd/nfs4state.c | 4 ++++ > fs/nfsd/nfsctl.c | 2 ++ > 3 files changed, 14 insertions(+) > > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c > index 82785db730d9..8ac089f8134c 100644 > --- a/fs/nfsd/nfs4recover.c > +++ b/fs/nfsd/nfs4recover.c > @@ -162,7 +162,9 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error) > if (error == -ENOENT) { > printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " > "Reboot recovery will not function correctly!\n"); > + mutex_lock(&nfsd_mutex); > nfsd4_client_tracking_exit(clp->net); > + mutex_unlock(&nfsd_mutex); > } > } > > @@ -2083,8 +2085,10 @@ nfsd4_client_record_create(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > nn->client_tracking_ops->create(clp); > + mutex_unlock(&nfsd_mutex); > } > > void > @@ -2092,8 +2096,10 @@ nfsd4_client_record_remove(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > nn->client_tracking_ops->remove(clp); > + mutex_unlock(&nfsd_mutex); > } > > int > @@ -2101,8 +2107,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) > { > struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); > > + mutex_lock(&nfsd_mutex); > if (nn->client_tracking_ops) > return nn->client_tracking_ops->check(clp); > + mutex_unlock(&nfsd_mutex); > > return -EOPNOTSUPP; > } > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index d5694987f86f..2794fdc8b678 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -2529,7 +2529,9 @@ static void inc_reclaim_complete(struct nfs4_client *clp) > nn->reclaim_str_hashtbl_size) { > printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n", > clp->net->ns.inum); > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_unlock(&nfsd_mutex); > } > } > > @@ -6773,7 +6775,9 @@ nfs4_laundromat(struct nfsd_net *nn) > lt.new_timeo = 0; > goto out; > } > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_unlock(&nfsd_mutex); > > spin_lock(&nn->s2s_cp_lock); > idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index 3f3e9f6c4250..649850b4bb60 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -1085,7 +1085,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) > if (!nn->nfsd_serv) > return -EBUSY; > trace_nfsd_end_grace(netns(file)); > + mutex_lock(&nfsd_mutex); > nfsd4_end_grace(nn); > + mutex_lock(&nfsd_mutex); > break; > default: > return -EINVAL; This seems like a very heavyweight way to ensure that this doesn't race, especially since the client tracking ops are per net-namespace and the nfsd_mutex is global. Also, how do you get two different tasks calling nfsd4_client_tracking_init() at the same time? That's called when the net namespace is set up, so there shouldn't be more than one copy running. I thought from an earlier patch that we were going to change this to just ensure that the client_tracking_ops pointer did a NULL -> non-NULL transition only once? I think that's sufficient to ensure that this race can't occur. -- Jeff Layton <jlayton@kernel.org>
© 2016 - 2025 Red Hat, Inc.