fs/nfsd/nfs4state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
Before calling nfsd4_run_cb to queue dl_recall to the callback_wq, we
increment the reference count of dl_stid.
We expect that after the corresponding work_struct is processed, the
reference count of dl_stid will be decremented through the callback
function nfsd4_cb_recall_release.
However, if the call to nfsd4_run_cb fails, the incremented reference
count of dl_stid will not be decremented correspondingly, leading to the
following nfs4_stid leak:
unreferenced object 0xffff88812067b578 (size 344):
comm "nfsd", pid 2761, jiffies 4295044002 (age 5541.241s)
hex dump (first 32 bytes):
01 00 00 00 6b 6b 6b 6b b8 02 c0 e2 81 88 ff ff ....kkkk........
00 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 ad 4e ad de .kkkkkkk.....N..
backtrace:
kmem_cache_alloc+0x4b9/0x700
nfsd4_process_open1+0x34/0x300
nfsd4_open+0x2d1/0x9d0
nfsd4_proc_compound+0x7a2/0xe30
nfsd_dispatch+0x241/0x3e0
svc_process_common+0x5d3/0xcc0
svc_process+0x2a3/0x320
nfsd+0x180/0x2e0
kthread+0x199/0x1d0
ret_from_fork+0x30/0x50
ret_from_fork_asm+0x1b/0x30
unreferenced object 0xffff8881499f4d28 (size 368):
comm "nfsd", pid 2761, jiffies 4295044005 (age 5541.239s)
hex dump (first 32 bytes):
01 00 00 00 00 00 00 00 30 4d 9f 49 81 88 ff ff ........0M.I....
30 4d 9f 49 81 88 ff ff 20 00 00 00 01 00 00 00 0M.I.... .......
backtrace:
kmem_cache_alloc+0x4b9/0x700
nfs4_alloc_stid+0x29/0x210
alloc_init_deleg+0x92/0x2e0
nfs4_set_delegation+0x284/0xc00
nfs4_open_delegation+0x216/0x3f0
nfsd4_process_open2+0x2b3/0xee0
nfsd4_open+0x770/0x9d0
nfsd4_proc_compound+0x7a2/0xe30
nfsd_dispatch+0x241/0x3e0
svc_process_common+0x5d3/0xcc0
svc_process+0x2a3/0x320
nfsd+0x180/0x2e0
kthread+0x199/0x1d0
ret_from_fork+0x30/0x50
ret_from_fork_asm+0x1b/0x30
Fix it by checking the result of nfsd4_run_cb and call nfs4_put_stid if
fail to queue dl_recall.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
---
fs/nfsd/nfs4state.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 153eeea2c7c9..0ccb87be47b7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5414,6 +5414,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
+ bool queued;
/*
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
@@ -5422,7 +5423,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
- WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
+ queued = nfsd4_run_cb(&dp->dl_recall);
+ WARN_ON_ONCE(!queued);
+ if (!queued)
+ nfs4_put_stid(&dp->dl_stid);
}
/* Called from break_lease() with flc_lock held. */
--
2.31.1
On Thu, 2025-02-13 at 15:25 +0800, Li Lingfeng wrote:
> Before calling nfsd4_run_cb to queue dl_recall to the callback_wq, we
> increment the reference count of dl_stid.
> We expect that after the corresponding work_struct is processed, the
> reference count of dl_stid will be decremented through the callback
> function nfsd4_cb_recall_release.
> However, if the call to nfsd4_run_cb fails, the incremented reference
> count of dl_stid will not be decremented correspondingly, leading to the
> following nfs4_stid leak:
> unreferenced object 0xffff88812067b578 (size 344):
> comm "nfsd", pid 2761, jiffies 4295044002 (age 5541.241s)
> hex dump (first 32 bytes):
> 01 00 00 00 6b 6b 6b 6b b8 02 c0 e2 81 88 ff ff ....kkkk........
> 00 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 ad 4e ad de .kkkkkkk.....N..
> backtrace:
> kmem_cache_alloc+0x4b9/0x700
> nfsd4_process_open1+0x34/0x300
> nfsd4_open+0x2d1/0x9d0
> nfsd4_proc_compound+0x7a2/0xe30
> nfsd_dispatch+0x241/0x3e0
> svc_process_common+0x5d3/0xcc0
> svc_process+0x2a3/0x320
> nfsd+0x180/0x2e0
> kthread+0x199/0x1d0
> ret_from_fork+0x30/0x50
> ret_from_fork_asm+0x1b/0x30
> unreferenced object 0xffff8881499f4d28 (size 368):
> comm "nfsd", pid 2761, jiffies 4295044005 (age 5541.239s)
> hex dump (first 32 bytes):
> 01 00 00 00 00 00 00 00 30 4d 9f 49 81 88 ff ff ........0M.I....
> 30 4d 9f 49 81 88 ff ff 20 00 00 00 01 00 00 00 0M.I.... .......
> backtrace:
> kmem_cache_alloc+0x4b9/0x700
> nfs4_alloc_stid+0x29/0x210
> alloc_init_deleg+0x92/0x2e0
> nfs4_set_delegation+0x284/0xc00
> nfs4_open_delegation+0x216/0x3f0
> nfsd4_process_open2+0x2b3/0xee0
> nfsd4_open+0x770/0x9d0
> nfsd4_proc_compound+0x7a2/0xe30
> nfsd_dispatch+0x241/0x3e0
> svc_process_common+0x5d3/0xcc0
> svc_process+0x2a3/0x320
> nfsd+0x180/0x2e0
> kthread+0x199/0x1d0
> ret_from_fork+0x30/0x50
> ret_from_fork_asm+0x1b/0x30
> Fix it by checking the result of nfsd4_run_cb and call nfs4_put_stid if
> fail to queue dl_recall.
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> ---
> fs/nfsd/nfs4state.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 153eeea2c7c9..0ccb87be47b7 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -5414,6 +5414,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
>
> static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
> {
> + bool queued;
> /*
> * We're assuming the state code never drops its reference
> * without first removing the lease. Since we're in this lease
> @@ -5422,7 +5423,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
> * we know it's safe to take a reference.
> */
> refcount_inc(&dp->dl_stid.sc_count);
> - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
> + queued = nfsd4_run_cb(&dp->dl_recall);
> + WARN_ON_ONCE(!queued);
> + if (!queued)
> + nfs4_put_stid(&dp->dl_stid);
> }
>
> /* Called from break_lease() with flc_lock held. */
Have you actually seen the WARN_ON_ONCE() pop under normal usage, or
was the problem you reproduced done via fault injection?
Unfortunately, this won't work. nfsd_break_one_deleg() is called from
the ->break_lease callback with the flc->flc_lock held, and
nfs4_put_stid can sleep in the sc_free callbacks.
--
Jeff Layton <jlayton@kernel.org>
在 2025/2/13 19:49, Jeff Layton 写道:
> On Thu, 2025-02-13 at 15:25 +0800, Li Lingfeng wrote:
>> Before calling nfsd4_run_cb to queue dl_recall to the callback_wq, we
>> increment the reference count of dl_stid.
>> We expect that after the corresponding work_struct is processed, the
>> reference count of dl_stid will be decremented through the callback
>> function nfsd4_cb_recall_release.
>> However, if the call to nfsd4_run_cb fails, the incremented reference
>> count of dl_stid will not be decremented correspondingly, leading to the
>> following nfs4_stid leak:
>> unreferenced object 0xffff88812067b578 (size 344):
>> comm "nfsd", pid 2761, jiffies 4295044002 (age 5541.241s)
>> hex dump (first 32 bytes):
>> 01 00 00 00 6b 6b 6b 6b b8 02 c0 e2 81 88 ff ff ....kkkk........
>> 00 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 ad 4e ad de .kkkkkkk.....N..
>> backtrace:
>> kmem_cache_alloc+0x4b9/0x700
>> nfsd4_process_open1+0x34/0x300
>> nfsd4_open+0x2d1/0x9d0
>> nfsd4_proc_compound+0x7a2/0xe30
>> nfsd_dispatch+0x241/0x3e0
>> svc_process_common+0x5d3/0xcc0
>> svc_process+0x2a3/0x320
>> nfsd+0x180/0x2e0
>> kthread+0x199/0x1d0
>> ret_from_fork+0x30/0x50
>> ret_from_fork_asm+0x1b/0x30
>> unreferenced object 0xffff8881499f4d28 (size 368):
>> comm "nfsd", pid 2761, jiffies 4295044005 (age 5541.239s)
>> hex dump (first 32 bytes):
>> 01 00 00 00 00 00 00 00 30 4d 9f 49 81 88 ff ff ........0M.I....
>> 30 4d 9f 49 81 88 ff ff 20 00 00 00 01 00 00 00 0M.I.... .......
>> backtrace:
>> kmem_cache_alloc+0x4b9/0x700
>> nfs4_alloc_stid+0x29/0x210
>> alloc_init_deleg+0x92/0x2e0
>> nfs4_set_delegation+0x284/0xc00
>> nfs4_open_delegation+0x216/0x3f0
>> nfsd4_process_open2+0x2b3/0xee0
>> nfsd4_open+0x770/0x9d0
>> nfsd4_proc_compound+0x7a2/0xe30
>> nfsd_dispatch+0x241/0x3e0
>> svc_process_common+0x5d3/0xcc0
>> svc_process+0x2a3/0x320
>> nfsd+0x180/0x2e0
>> kthread+0x199/0x1d0
>> ret_from_fork+0x30/0x50
>> ret_from_fork_asm+0x1b/0x30
>> Fix it by checking the result of nfsd4_run_cb and call nfs4_put_stid if
>> fail to queue dl_recall.
>>
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
>> ---
>> fs/nfsd/nfs4state.c | 6 +++++-
>> 1 file changed, 5 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 153eeea2c7c9..0ccb87be47b7 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -5414,6 +5414,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
>>
>> static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
>> {
>> + bool queued;
>> /*
>> * We're assuming the state code never drops its reference
>> * without first removing the lease. Since we're in this lease
>> @@ -5422,7 +5423,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
>> * we know it's safe to take a reference.
>> */
>> refcount_inc(&dp->dl_stid.sc_count);
>> - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
>> + queued = nfsd4_run_cb(&dp->dl_recall);
>> + WARN_ON_ONCE(!queued);
>> + if (!queued)
>> + nfs4_put_stid(&dp->dl_stid);
>> }
>>
>> /* Called from break_lease() with flc_lock held. */
>
> Have you actually seen the WARN_ON_ONCE() pop under normal usage, or
> was the problem you reproduced done via fault injection?
Hi,
I add mdelay for cb_work before processing it and add msleep in
nfs_do_return_delegation to reproduce it.
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1188,6 +1188,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct
nfs4_file *fp,
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ dp->dl_recall.cb_work.debug_flag = 0x1234;
+ printk("%s init work func %px\n", __func__,
dp->dl_recall.cb_work.func);
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2708,6 +2708,12 @@ static void process_scheduled_works(struct worker
*worker)
worker->pool->watchdog_ts = jiffies;
first = false;
}
+ if (work->debug_flag == 0x1234) {
+ printk("%s func %px\n", __func__, work->func);
+ printk("%s delay before clear pending...\n", __func__);
+ mdelay(10 * 1000);
+ printk("%s delay done\n", __func__);
+ }
process_one_work(worker, work);
}
}
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -263,6 +263,15 @@ static int nfs_do_return_delegation(struct inode
*inode, struct nfs_delegation *
const struct cred *cred;
int res = 0;
+ printk("sleep before deleg return...\n");
+ while (1) {
+ ifdebug(PROC)
+ msleep(10 * 1000);
+ else
+ break;
+ }
+ printk("sleep done\n");
+
if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
spin_lock(&delegation->lock);
cred = get_cred(delegation->cred);
> Unfortunately, this won't work. nfsd_break_one_deleg() is called from
> the ->break_lease callback with the flc->flc_lock held, and
> nfs4_put_stid can sleep in the sc_free callbacks.
Are you referring to nfs4_free_deleg? I didn't quite see where it might
sleep. Could you please explain it to me?
Thanks.
On Thu, 2025-02-13 at 20:28 +0800, Li Lingfeng wrote:
> 在 2025/2/13 19:49, Jeff Layton 写道:
> > On Thu, 2025-02-13 at 15:25 +0800, Li Lingfeng wrote:
> > > Before calling nfsd4_run_cb to queue dl_recall to the callback_wq, we
> > > increment the reference count of dl_stid.
> > > We expect that after the corresponding work_struct is processed, the
> > > reference count of dl_stid will be decremented through the callback
> > > function nfsd4_cb_recall_release.
> > > However, if the call to nfsd4_run_cb fails, the incremented reference
> > > count of dl_stid will not be decremented correspondingly, leading to the
> > > following nfs4_stid leak:
> > > unreferenced object 0xffff88812067b578 (size 344):
> > > comm "nfsd", pid 2761, jiffies 4295044002 (age 5541.241s)
> > > hex dump (first 32 bytes):
> > > 01 00 00 00 6b 6b 6b 6b b8 02 c0 e2 81 88 ff ff ....kkkk........
> > > 00 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 ad 4e ad de .kkkkkkk.....N..
> > > backtrace:
> > > kmem_cache_alloc+0x4b9/0x700
> > > nfsd4_process_open1+0x34/0x300
> > > nfsd4_open+0x2d1/0x9d0
> > > nfsd4_proc_compound+0x7a2/0xe30
> > > nfsd_dispatch+0x241/0x3e0
> > > svc_process_common+0x5d3/0xcc0
> > > svc_process+0x2a3/0x320
> > > nfsd+0x180/0x2e0
> > > kthread+0x199/0x1d0
> > > ret_from_fork+0x30/0x50
> > > ret_from_fork_asm+0x1b/0x30
> > > unreferenced object 0xffff8881499f4d28 (size 368):
> > > comm "nfsd", pid 2761, jiffies 4295044005 (age 5541.239s)
> > > hex dump (first 32 bytes):
> > > 01 00 00 00 00 00 00 00 30 4d 9f 49 81 88 ff ff ........0M.I....
> > > 30 4d 9f 49 81 88 ff ff 20 00 00 00 01 00 00 00 0M.I.... .......
> > > backtrace:
> > > kmem_cache_alloc+0x4b9/0x700
> > > nfs4_alloc_stid+0x29/0x210
> > > alloc_init_deleg+0x92/0x2e0
> > > nfs4_set_delegation+0x284/0xc00
> > > nfs4_open_delegation+0x216/0x3f0
> > > nfsd4_process_open2+0x2b3/0xee0
> > > nfsd4_open+0x770/0x9d0
> > > nfsd4_proc_compound+0x7a2/0xe30
> > > nfsd_dispatch+0x241/0x3e0
> > > svc_process_common+0x5d3/0xcc0
> > > svc_process+0x2a3/0x320
> > > nfsd+0x180/0x2e0
> > > kthread+0x199/0x1d0
> > > ret_from_fork+0x30/0x50
> > > ret_from_fork_asm+0x1b/0x30
> > > Fix it by checking the result of nfsd4_run_cb and call nfs4_put_stid if
> > > fail to queue dl_recall.
> > >
> > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> > > ---
> > > fs/nfsd/nfs4state.c | 6 +++++-
> > > 1 file changed, 5 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 153eeea2c7c9..0ccb87be47b7 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -5414,6 +5414,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
> > >
> > > static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
> > > {
> > > + bool queued;
> > > /*
> > > * We're assuming the state code never drops its reference
> > > * without first removing the lease. Since we're in this lease
> > > @@ -5422,7 +5423,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
> > > * we know it's safe to take a reference.
> > > */
> > > refcount_inc(&dp->dl_stid.sc_count);
> > > - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
> > > + queued = nfsd4_run_cb(&dp->dl_recall);
> > > + WARN_ON_ONCE(!queued);
> > > + if (!queued)
> > > + nfs4_put_stid(&dp->dl_stid);
> > > }
> > >
> > > /* Called from break_lease() with flc_lock held. */
> >
> > Have you actually seen the WARN_ON_ONCE() pop under normal usage, or
> > was the problem you reproduced done via fault injection?
>
> Hi,
>
> I add mdelay for cb_work before processing it and add msleep in
> nfs_do_return_delegation to reproduce it.
>
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1188,6 +1188,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct
> nfs4_file *fp,
> dp->dl_recalled = false;
> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
> + dp->dl_recall.cb_work.debug_flag = 0x1234;
> + printk("%s init work func %px\n", __func__,
> dp->dl_recall.cb_work.func);
> get_nfs4_file(fp);
> dp->dl_stid.sc_file = fp;
> return dp;
>
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -2708,6 +2708,12 @@ static void process_scheduled_works(struct worker
> *worker)
> worker->pool->watchdog_ts = jiffies;
> first = false;
> }
> + if (work->debug_flag == 0x1234) {
> + printk("%s func %px\n", __func__, work->func);
> + printk("%s delay before clear pending...\n", __func__);
> + mdelay(10 * 1000);
> + printk("%s delay done\n", __func__);
> + }
> process_one_work(worker, work);
> }
> }
>
> --- a/fs/nfs/delegation.c
> +++ b/fs/nfs/delegation.c
> @@ -263,6 +263,15 @@ static int nfs_do_return_delegation(struct inode
> *inode, struct nfs_delegation *
> const struct cred *cred;
> int res = 0;
>
> + printk("sleep before deleg return...\n");
> + while (1) {
> + ifdebug(PROC)
> + msleep(10 * 1000);
> + else
> + break;
> + }
> + printk("sleep done\n");
> +
> if (!test_bit(NFS_DELEGATION_REVOKED, &delegation-flags)) {
> spin_lock(&delegation->lock);
> cred = get_cred(delegation->cred);
>
> > Unfortunately, this won't work. nfsd_break_one_deleg() is called from
> > the ->break_lease callback with the flc->flc_lock held, and
> > nfs4_put_stid can sleep in the sc_free callbacks.
>
> Are you referring to nfs4_free_deleg? I didn't quite see where it might
> sleep. Could you please explain it to me?
>
I stand corrected. nfs4_free_deleg() won't sleep, but
nfs4_free_lock_stateid() can. In this case, you know that this is a
deleg stateid, so this should actually be safe after all.
If you do this though, please add a comment to nfs4_free_deleg()
mentioning that it mustn't ever sleep, since it can be called from that
context.
--
Jeff Layton <jlayton@kernel.org>
在 2025/2/13 20:34, Jeff Layton 写道:
> On Thu, 2025-02-13 at 20:28 +0800, Li Lingfeng wrote:
>> 在 2025/2/13 19:49, Jeff Layton 写道:
>>> On Thu, 2025-02-13 at 15:25 +0800, Li Lingfeng wrote:
>>>> Before calling nfsd4_run_cb to queue dl_recall to the callback_wq, we
>>>> increment the reference count of dl_stid.
>>>> We expect that after the corresponding work_struct is processed, the
>>>> reference count of dl_stid will be decremented through the callback
>>>> function nfsd4_cb_recall_release.
>>>> However, if the call to nfsd4_run_cb fails, the incremented reference
>>>> count of dl_stid will not be decremented correspondingly, leading to the
>>>> following nfs4_stid leak:
>>>> unreferenced object 0xffff88812067b578 (size 344):
>>>> comm "nfsd", pid 2761, jiffies 4295044002 (age 5541.241s)
>>>> hex dump (first 32 bytes):
>>>> 01 00 00 00 6b 6b 6b 6b b8 02 c0 e2 81 88 ff ff ....kkkk........
>>>> 00 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 ad 4e ad de .kkkkkkk.....N..
>>>> backtrace:
>>>> kmem_cache_alloc+0x4b9/0x700
>>>> nfsd4_process_open1+0x34/0x300
>>>> nfsd4_open+0x2d1/0x9d0
>>>> nfsd4_proc_compound+0x7a2/0xe30
>>>> nfsd_dispatch+0x241/0x3e0
>>>> svc_process_common+0x5d3/0xcc0
>>>> svc_process+0x2a3/0x320
>>>> nfsd+0x180/0x2e0
>>>> kthread+0x199/0x1d0
>>>> ret_from_fork+0x30/0x50
>>>> ret_from_fork_asm+0x1b/0x30
>>>> unreferenced object 0xffff8881499f4d28 (size 368):
>>>> comm "nfsd", pid 2761, jiffies 4295044005 (age 5541.239s)
>>>> hex dump (first 32 bytes):
>>>> 01 00 00 00 00 00 00 00 30 4d 9f 49 81 88 ff ff ........0M.I....
>>>> 30 4d 9f 49 81 88 ff ff 20 00 00 00 01 00 00 00 0M.I.... .......
>>>> backtrace:
>>>> kmem_cache_alloc+0x4b9/0x700
>>>> nfs4_alloc_stid+0x29/0x210
>>>> alloc_init_deleg+0x92/0x2e0
>>>> nfs4_set_delegation+0x284/0xc00
>>>> nfs4_open_delegation+0x216/0x3f0
>>>> nfsd4_process_open2+0x2b3/0xee0
>>>> nfsd4_open+0x770/0x9d0
>>>> nfsd4_proc_compound+0x7a2/0xe30
>>>> nfsd_dispatch+0x241/0x3e0
>>>> svc_process_common+0x5d3/0xcc0
>>>> svc_process+0x2a3/0x320
>>>> nfsd+0x180/0x2e0
>>>> kthread+0x199/0x1d0
>>>> ret_from_fork+0x30/0x50
>>>> ret_from_fork_asm+0x1b/0x30
>>>> Fix it by checking the result of nfsd4_run_cb and call nfs4_put_stid if
>>>> fail to queue dl_recall.
>>>>
>>>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>>>> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
>>>> ---
>>>> fs/nfsd/nfs4state.c | 6 +++++-
>>>> 1 file changed, 5 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index 153eeea2c7c9..0ccb87be47b7 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -5414,6 +5414,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
>>>>
>>>> static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
>>>> {
>>>> + bool queued;
>>>> /*
>>>> * We're assuming the state code never drops its reference
>>>> * without first removing the lease. Since we're in this lease
>>>> @@ -5422,7 +5423,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
>>>> * we know it's safe to take a reference.
>>>> */
>>>> refcount_inc(&dp->dl_stid.sc_count);
>>>> - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
>>>> + queued = nfsd4_run_cb(&dp->dl_recall);
>>>> + WARN_ON_ONCE(!queued);
>>>> + if (!queued)
>>>> + nfs4_put_stid(&dp->dl_stid);
>>>> }
>>>>
>>>> /* Called from break_lease() with flc_lock held. */
>>> Have you actually seen the WARN_ON_ONCE() pop under normal usage, or
>>> was the problem you reproduced done via fault injection?
>> Hi,
>>
>> I add mdelay for cb_work before processing it and add msleep in
>> nfs_do_return_delegation to reproduce it.
>>
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -1188,6 +1188,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct
>> nfs4_file *fp,
>> dp->dl_recalled = false;
>> nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
>> &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
>> + dp->dl_recall.cb_work.debug_flag = 0x1234;
>> + printk("%s init work func %px\n", __func__,
>> dp->dl_recall.cb_work.func);
>> get_nfs4_file(fp);
>> dp->dl_stid.sc_file = fp;
>> return dp;
>>
>> --- a/kernel/workqueue.c
>> +++ b/kernel/workqueue.c
>> @@ -2708,6 +2708,12 @@ static void process_scheduled_works(struct worker
>> *worker)
>> worker->pool->watchdog_ts = jiffies;
>> first = false;
>> }
>> + if (work->debug_flag == 0x1234) {
>> + printk("%s func %px\n", __func__, work->func);
>> + printk("%s delay before clear pending...\n", __func__);
>> + mdelay(10 * 1000);
>> + printk("%s delay done\n", __func__);
>> + }
>> process_one_work(worker, work);
>> }
>> }
>>
>> --- a/fs/nfs/delegation.c
>> +++ b/fs/nfs/delegation.c
>> @@ -263,6 +263,15 @@ static int nfs_do_return_delegation(struct inode
>> *inode, struct nfs_delegation *
>> const struct cred *cred;
>> int res = 0;
>>
>> + printk("sleep before deleg return...\n");
>> + while (1) {
>> + ifdebug(PROC)
>> + msleep(10 * 1000);
>> + else
>> + break;
>> + }
>> + printk("sleep done\n");
>> +
>> if (!test_bit(NFS_DELEGATION_REVOKED, &delegation-flags)) {
>> spin_lock(&delegation->lock);
>> cred = get_cred(delegation->cred);
>>
>>> Unfortunately, this won't work. nfsd_break_one_deleg() is called from
>>> the ->break_lease callback with the flc->flc_lock held, and
>>> nfs4_put_stid can sleep in the sc_free callbacks.
>> Are you referring to nfs4_free_deleg? I didn't quite see where it might
>> sleep. Could you please explain it to me?
>>
> I stand corrected. nfs4_free_deleg() won't sleep, but
> nfs4_free_lock_stateid() can. In this case, you know that this is a
> deleg stateid, so this should actually be safe after all.
>
> If you do this though, please add a comment to nfs4_free_deleg()
> mentioning that it mustn't ever sleep, since it can be called from that
> context.
Thank you for your advice, I will send v2 soon.
© 2016 - 2025 Red Hat, Inc.