[v1] nfs: fix the race of lock/unlock and open

[PATCH] nfs: fix the race of lock/unlock and open

Posted by Li Lingfeng 9 months, 3 weeks ago

LOCK may extend an existing lock and release another one and UNLOCK may
also release an existing lock.
When opening a file, there may be access to file locks that have been
concurrently released by lock/unlock operations, potentially triggering
UAF.
While certain concurrent scenarios involving lock/unlock and open
operations have been safeguarded with locks – for example,
nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
locks_lock_inode_wait() – there remain cases where such protection is not
yet implemented.

The issue can be reproduced through the following steps:
T1: open in read-only mode with three consecutive lock operations applied
    lock1(0~100) --> add lock1 to file
    lock2(120~200) --> add lock2 to file
    lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
T2: restart nfs-server and run state manager
T3: open in write-only mode
    T1                            T2                                T3
                            start recover
lock1
lock2
                            nfs4_open_reclaim
                            clear_bit // NFS_DELEGATED_STATE
lock3
 _nfs4_proc_setlk
  lock so_delegreturn_mutex
  unlock so_delegreturn_mutex
  _nfs4_do_setlk
                            recover done
                                                lock so_delegreturn_mutex
                                                nfs_delegation_claim_locks
                                                get lock2
   rpc_run_task
   ...
   nfs4_lock_done
    locks_lock_inode_wait
    ...
     locks_dispose_list
     free lock2
                                                use lock2
                                                // UAF
                                                unlock so_delegreturn_mutex

Get so_delegreturn_mutex before calling locks_lock_inode_wait to fix this
issue.

Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
---
 fs/nfs/nfs4proc.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 970f28dbf253..297ee2442c02 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7112,13 +7112,16 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 		.inode = calldata->lsp->ls_state->inode,
 		.stateid = &calldata->arg.stateid,
 	};
+	struct nfs4_state_owner *sp = calldata->ctx->state->owner;
 
 	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
 		return;
 	switch (task->tk_status) {
 		case 0:
 			renew_lease(calldata->server, calldata->timestamp);
+			mutex_lock(&sp->so_delegreturn_mutex);
 			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
+			mutex_unlock(&sp->so_delegreturn_mutex);
 			if (nfs4_update_lock_stateid(calldata->lsp,
 					&calldata->res.stateid))
 				break;
@@ -7375,6 +7378,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_lockdata *data = calldata;
 	struct nfs4_lock_state *lsp = data->lsp;
+	struct nfs4_state_owner *sp = data->ctx->state->owner;
 
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return;
@@ -7386,8 +7390,12 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 				data->timestamp);
 		if (data->arg.new_lock && !data->cancelled) {
 			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
-			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
+			mutex_lock(&sp->so_delegreturn_mutex);
+			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) {
+				mutex_unlock(&sp->so_delegreturn_mutex);
 				goto out_restart;
+			}
+			mutex_unlock(&sp->so_delegreturn_mutex);
 		}
 		if (data->arg.new_lock_owner != 0) {
 			nfs_confirm_seqid(&lsp->ls_seqid, 0);
@@ -7597,11 +7605,14 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	int status;
 
 	request->c.flc_flags |= FL_ACCESS;
-	status = locks_lock_inode_wait(state->inode, request);
-	if (status < 0)
-		goto out;
 	mutex_lock(&sp->so_delegreturn_mutex);
 	down_read(&nfsi->rwsem);
+	status = locks_lock_inode_wait(state->inode, request);
+	if (status < 0) {
+		up_read(&nfsi->rwsem);
+		mutex_unlock(&sp->so_delegreturn_mutex);
+		goto out;
+	}
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		/* Yes: cache locks! */
 		/* ...but avoid races with delegation recall... */
-- 
2.31.1

Re: [PATCH] nfs: fix the race of lock/unlock and open

Posted by yangerkun 7 months ago

nfs4_reclaim_locks already given us solution:

static int nfs4_reclaim_locks(struct nfs4_state *state, const struct 
nfs4_state_recovery_ops *ops)
...
	/* Guard against delegation returns and new lock/unlock calls */
	down_write(&nfsi->rwsem);
	spin_lock(&flctx->flc_lock);

Can you help try this way?

在 2025/4/19 16:57, Li Lingfeng 写道:
> LOCK may extend an existing lock and release another one and UNLOCK may
> also release an existing lock.
> When opening a file, there may be access to file locks that have been
> concurrently released by lock/unlock operations, potentially triggering
> UAF.
> While certain concurrent scenarios involving lock/unlock and open
> operations have been safeguarded with locks – for example,
> nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
> locks_lock_inode_wait() – there remain cases where such protection is not
> yet implemented.
> 
> The issue can be reproduced through the following steps:
> T1: open in read-only mode with three consecutive lock operations applied
>      lock1(0~100) --> add lock1 to file
>      lock2(120~200) --> add lock2 to file
>      lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
> T2: restart nfs-server and run state manager
> T3: open in write-only mode
>      T1                            T2                                T3
>                              start recover
> lock1
> lock2
>                              nfs4_open_reclaim
>                              clear_bit // NFS_DELEGATED_STATE
> lock3
>   _nfs4_proc_setlk
>    lock so_delegreturn_mutex
>    unlock so_delegreturn_mutex
>    _nfs4_do_setlk
>                              recover done
>                                                  lock so_delegreturn_mutex
>                                                  nfs_delegation_claim_locks
>                                                  get lock2
>     rpc_run_task
>     ...
>     nfs4_lock_done
>      locks_lock_inode_wait
>      ...
>       locks_dispose_list
>       free lock2
>                                                  use lock2
>                                                  // UAF
>                                                  unlock so_delegreturn_mutex
> 
> Get so_delegreturn_mutex before calling locks_lock_inode_wait to fix this
> issue.
> 
> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> ---
>   fs/nfs/nfs4proc.c | 19 +++++++++++++++----
>   1 file changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 970f28dbf253..297ee2442c02 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -7112,13 +7112,16 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
>   		.inode = calldata->lsp->ls_state->inode,
>   		.stateid = &calldata->arg.stateid,
>   	};
> +	struct nfs4_state_owner *sp = calldata->ctx->state->owner;
>   
>   	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
>   		return;
>   	switch (task->tk_status) {
>   		case 0:
>   			renew_lease(calldata->server, calldata->timestamp);
> +			mutex_lock(&sp->so_delegreturn_mutex);
>   			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
> +			mutex_unlock(&sp->so_delegreturn_mutex);
>   			if (nfs4_update_lock_stateid(calldata->lsp,
>   					&calldata->res.stateid))
>   				break;
> @@ -7375,6 +7378,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>   {
>   	struct nfs4_lockdata *data = calldata;
>   	struct nfs4_lock_state *lsp = data->lsp;
> +	struct nfs4_state_owner *sp = data->ctx->state->owner;
>   
>   	if (!nfs4_sequence_done(task, &data->res.seq_res))
>   		return;
> @@ -7386,8 +7390,12 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>   				data->timestamp);
>   		if (data->arg.new_lock && !data->cancelled) {
>   			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> -			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
> +			mutex_lock(&sp->so_delegreturn_mutex);
> +			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) {
> +				mutex_unlock(&sp->so_delegreturn_mutex);
>   				goto out_restart;
> +			}
> +			mutex_unlock(&sp->so_delegreturn_mutex);
>   		}
>   		if (data->arg.new_lock_owner != 0) {
>   			nfs_confirm_seqid(&lsp->ls_seqid, 0);
> @@ -7597,11 +7605,14 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
>   	int status;
>   
>   	request->c.flc_flags |= FL_ACCESS;
> -	status = locks_lock_inode_wait(state->inode, request);
> -	if (status < 0)
> -		goto out;
>   	mutex_lock(&sp->so_delegreturn_mutex);
>   	down_read(&nfsi->rwsem);
> +	status = locks_lock_inode_wait(state->inode, request);
> +	if (status < 0) {
> +		up_read(&nfsi->rwsem);
> +		mutex_unlock(&sp->so_delegreturn_mutex);
> +		goto out;
> +	}
>   	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
>   		/* Yes: cache locks! */
>   		/* ...but avoid races with delegation recall... */

Re: [PATCH] nfs: fix the race of lock/unlock and open

Posted by Li Lingfeng 8 months, 1 week ago

Friendly ping..

Thanks

在 2025/4/19 16:57, Li Lingfeng 写道:
> LOCK may extend an existing lock and release another one and UNLOCK may
> also release an existing lock.
> When opening a file, there may be access to file locks that have been
> concurrently released by lock/unlock operations, potentially triggering
> UAF.
> While certain concurrent scenarios involving lock/unlock and open
> operations have been safeguarded with locks – for example,
> nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
> locks_lock_inode_wait() – there remain cases where such protection is not
> yet implemented.
>
> The issue can be reproduced through the following steps:
> T1: open in read-only mode with three consecutive lock operations applied
>      lock1(0~100) --> add lock1 to file
>      lock2(120~200) --> add lock2 to file
>      lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
> T2: restart nfs-server and run state manager
> T3: open in write-only mode
>      T1                            T2                                T3
>                              start recover
> lock1
> lock2
>                              nfs4_open_reclaim
>                              clear_bit // NFS_DELEGATED_STATE
> lock3
>   _nfs4_proc_setlk
>    lock so_delegreturn_mutex
>    unlock so_delegreturn_mutex
>    _nfs4_do_setlk
>                              recover done
>                                                  lock so_delegreturn_mutex
>                                                  nfs_delegation_claim_locks
>                                                  get lock2
>     rpc_run_task
>     ...
>     nfs4_lock_done
>      locks_lock_inode_wait
>      ...
>       locks_dispose_list
>       free lock2
>                                                  use lock2
>                                                  // UAF
>                                                  unlock so_delegreturn_mutex
>
> Get so_delegreturn_mutex before calling locks_lock_inode_wait to fix this
> issue.
>
> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
> ---
>   fs/nfs/nfs4proc.c | 19 +++++++++++++++----
>   1 file changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 970f28dbf253..297ee2442c02 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -7112,13 +7112,16 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
>   		.inode = calldata->lsp->ls_state->inode,
>   		.stateid = &calldata->arg.stateid,
>   	};
> +	struct nfs4_state_owner *sp = calldata->ctx->state->owner;
>   
>   	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
>   		return;
>   	switch (task->tk_status) {
>   		case 0:
>   			renew_lease(calldata->server, calldata->timestamp);
> +			mutex_lock(&sp->so_delegreturn_mutex);
>   			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
> +			mutex_unlock(&sp->so_delegreturn_mutex);
>   			if (nfs4_update_lock_stateid(calldata->lsp,
>   					&calldata->res.stateid))
>   				break;
> @@ -7375,6 +7378,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>   {
>   	struct nfs4_lockdata *data = calldata;
>   	struct nfs4_lock_state *lsp = data->lsp;
> +	struct nfs4_state_owner *sp = data->ctx->state->owner;
>   
>   	if (!nfs4_sequence_done(task, &data->res.seq_res))
>   		return;
> @@ -7386,8 +7390,12 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
>   				data->timestamp);
>   		if (data->arg.new_lock && !data->cancelled) {
>   			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
> -			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
> +			mutex_lock(&sp->so_delegreturn_mutex);
> +			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) {
> +				mutex_unlock(&sp->so_delegreturn_mutex);
>   				goto out_restart;
> +			}
> +			mutex_unlock(&sp->so_delegreturn_mutex);
>   		}
>   		if (data->arg.new_lock_owner != 0) {
>   			nfs_confirm_seqid(&lsp->ls_seqid, 0);
> @@ -7597,11 +7605,14 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
>   	int status;
>   
>   	request->c.flc_flags |= FL_ACCESS;
> -	status = locks_lock_inode_wait(state->inode, request);
> -	if (status < 0)
> -		goto out;
>   	mutex_lock(&sp->so_delegreturn_mutex);
>   	down_read(&nfsi->rwsem);
> +	status = locks_lock_inode_wait(state->inode, request);
> +	if (status < 0) {
> +		up_read(&nfsi->rwsem);
> +		mutex_unlock(&sp->so_delegreturn_mutex);
> +		goto out;
> +	}
>   	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
>   		/* Yes: cache locks! */
>   		/* ...but avoid races with delegation recall... */

Re: [PATCH] nfs: fix the race of lock/unlock and open

Posted by Li Lingfeng 7 months, 2 weeks ago

Ping again...

Thanks.

在 2025/6/5 14:51, Li Lingfeng 写道:
> Friendly ping..
>
> Thanks
>
> 在 2025/4/19 16:57, Li Lingfeng 写道:
>> LOCK may extend an existing lock and release another one and UNLOCK may
>> also release an existing lock.
>> When opening a file, there may be access to file locks that have been
>> concurrently released by lock/unlock operations, potentially triggering
>> UAF.
>> While certain concurrent scenarios involving lock/unlock and open
>> operations have been safeguarded with locks – for example,
>> nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
>> locks_lock_inode_wait() – there remain cases where such protection is 
>> not
>> yet implemented.
>>
>> The issue can be reproduced through the following steps:
>> T1: open in read-only mode with three consecutive lock operations 
>> applied
>>      lock1(0~100) --> add lock1 to file
>>      lock2(120~200) --> add lock2 to file
>>      lock3(50~150) --> extend lock1 to cover range 0~200 and release 
>> lock2
>> T2: restart nfs-server and run state manager
>> T3: open in write-only mode
>>      T1 T2                                T3
>>                              start recover
>> lock1
>> lock2
>>                              nfs4_open_reclaim
>>                              clear_bit // NFS_DELEGATED_STATE
>> lock3
>>   _nfs4_proc_setlk
>>    lock so_delegreturn_mutex
>>    unlock so_delegreturn_mutex
>>    _nfs4_do_setlk
>>                              recover done
>>                                                  lock 
>> so_delegreturn_mutex
>> nfs_delegation_claim_locks
>>                                                  get lock2
>>     rpc_run_task
>>     ...
>>     nfs4_lock_done
>>      locks_lock_inode_wait
>>      ...
>>       locks_dispose_list
>>       free lock2
>>                                                  use lock2
>>                                                  // UAF
>>                                                  unlock 
>> so_delegreturn_mutex
>>
>> Get so_delegreturn_mutex before calling locks_lock_inode_wait to fix 
>> this
>> issue.
>>
>> Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be 
>> atomic with the stateid update")
>> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
>> ---
>>   fs/nfs/nfs4proc.c | 19 +++++++++++++++----
>>   1 file changed, 15 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 970f28dbf253..297ee2442c02 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -7112,13 +7112,16 @@ static void nfs4_locku_done(struct rpc_task 
>> *task, void *data)
>>           .inode = calldata->lsp->ls_state->inode,
>>           .stateid = &calldata->arg.stateid,
>>       };
>> +    struct nfs4_state_owner *sp = calldata->ctx->state->owner;
>>         if (!nfs4_sequence_done(task, &calldata->res.seq_res))
>>           return;
>>       switch (task->tk_status) {
>>           case 0:
>>               renew_lease(calldata->server, calldata->timestamp);
>> +            mutex_lock(&sp->so_delegreturn_mutex);
>> locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
>> +            mutex_unlock(&sp->so_delegreturn_mutex);
>>               if (nfs4_update_lock_stateid(calldata->lsp,
>>                       &calldata->res.stateid))
>>                   break;
>> @@ -7375,6 +7378,7 @@ static void nfs4_lock_done(struct rpc_task 
>> *task, void *calldata)
>>   {
>>       struct nfs4_lockdata *data = calldata;
>>       struct nfs4_lock_state *lsp = data->lsp;
>> +    struct nfs4_state_owner *sp = data->ctx->state->owner;
>>         if (!nfs4_sequence_done(task, &data->res.seq_res))
>>           return;
>> @@ -7386,8 +7390,12 @@ static void nfs4_lock_done(struct rpc_task 
>> *task, void *calldata)
>>                   data->timestamp);
>>           if (data->arg.new_lock && !data->cancelled) {
>>               data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
>> -            if (locks_lock_inode_wait(lsp->ls_state->inode, 
>> &data->fl) < 0)
>> +            mutex_lock(&sp->so_delegreturn_mutex);
>> +            if (locks_lock_inode_wait(lsp->ls_state->inode, 
>> &data->fl) < 0) {
>> +                mutex_unlock(&sp->so_delegreturn_mutex);
>>                   goto out_restart;
>> +            }
>> +            mutex_unlock(&sp->so_delegreturn_mutex);
>>           }
>>           if (data->arg.new_lock_owner != 0) {
>>               nfs_confirm_seqid(&lsp->ls_seqid, 0);
>> @@ -7597,11 +7605,14 @@ static int _nfs4_proc_setlk(struct nfs4_state 
>> *state, int cmd, struct file_lock
>>       int status;
>>         request->c.flc_flags |= FL_ACCESS;
>> -    status = locks_lock_inode_wait(state->inode, request);
>> -    if (status < 0)
>> -        goto out;
>>       mutex_lock(&sp->so_delegreturn_mutex);
>>       down_read(&nfsi->rwsem);
>> +    status = locks_lock_inode_wait(state->inode, request);
>> +    if (status < 0) {
>> +        up_read(&nfsi->rwsem);
>> +        mutex_unlock(&sp->so_delegreturn_mutex);
>> +        goto out;
>> +    }
>>       if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
>>           /* Yes: cache locks! */
>>           /* ...but avoid races with delegation recall... */