[v1] cachefiles: some bugfixes and cleanups for ondemand requests

[PATCH 07/12] cachefiles: add spin_lock for cachefiles_ondemand_info

Posted by libaokun@huaweicloud.com 1 year, 9 months ago

From: Baokun Li <libaokun1@huawei.com>

The following concurrency may cause a read request to fail to be completed
and result in a hung:

           t1             |             t2
---------------------------------------------------------
                            cachefiles_ondemand_copen
                              req = xa_erase(&cache->reqs, id)
// Anon fd is maliciously closed.
cachefiles_ondemand_fd_release
  xa_lock(&cache->reqs)
  cachefiles_ondemand_set_object_close(object)
  xa_unlock(&cache->reqs)
                              cachefiles_ondemand_set_object_open
                              // No one will ever close it again.
cachefiles_ondemand_daemon_read
  cachefiles_ondemand_select_req
  // Get a read req but its fd is already closed.
  // The daemon can't issue a cread ioctl with an closed fd, then hung.

So add spin_lock for cachefiles_ondemand_info to protect ondemand_id and
state, thus we can avoid the above problem in cachefiles_ondemand_copen()
by using ondemand_id to determine if fd has been released.

Signed-off-by: Baokun Li <libaokun1@huawei.com>
---
 fs/cachefiles/internal.h |  1 +
 fs/cachefiles/ondemand.c | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 7745b8abc3aa..45c8bed60538 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -55,6 +55,7 @@ struct cachefiles_ondemand_info {
 	int				ondemand_id;
 	enum cachefiles_object_state	state;
 	struct cachefiles_object	*object;
+	spinlock_t			lock;
 };
 
 /*
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index 898fab68332b..b5e6a851ef04 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -16,13 +16,16 @@ static int cachefiles_ondemand_fd_release(struct inode *inode,
 	struct cachefiles_object *object = file->private_data;
 	struct cachefiles_cache *cache = object->volume->cache;
 	struct cachefiles_ondemand_info *info = object->ondemand;
-	int object_id = info->ondemand_id;
+	int object_id;
 	struct cachefiles_req *req;
 	XA_STATE(xas, &cache->reqs, 0);
 
 	xa_lock(&cache->reqs);
+	spin_lock(&info->lock);
+	object_id = info->ondemand_id;
 	info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
 	cachefiles_ondemand_set_object_close(object);
+	spin_unlock(&info->lock);
 
 	/* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */
 	xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
@@ -127,6 +130,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
 {
 	struct cachefiles_req *req;
 	struct fscache_cookie *cookie;
+	struct cachefiles_ondemand_info *info;
 	char *pid, *psize;
 	unsigned long id;
 	long size;
@@ -185,6 +189,14 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
 		goto out;
 	}
 
+	info = req->object->ondemand;
+	spin_lock(&info->lock);
+	/* The anonymous fd was closed before copen ? */
+	if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) {
+		spin_unlock(&info->lock);
+		req->error = -EBADFD;
+		goto out;
+	}
 	cookie = req->object->cookie;
 	cookie->object_size = size;
 	if (size)
@@ -194,6 +206,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
 	trace_cachefiles_ondemand_copen(req->object, id, size);
 
 	cachefiles_ondemand_set_object_open(req->object);
+	spin_unlock(&info->lock);
 	wake_up_all(&cache->daemon_pollwq);
 
 out:
@@ -596,6 +609,7 @@ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
 		return -ENOMEM;
 
 	object->ondemand->object = object;
+	spin_lock_init(&object->ondemand->lock);
 	INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker);
 	return 0;
 }
-- 
2.39.2

Re: [PATCH 07/12] cachefiles: add spin_lock for cachefiles_ondemand_info

Posted by Jingbo Xu 1 year, 9 months ago


On 4/24/24 11:39 AM, libaokun@huaweicloud.com wrote:
> From: Baokun Li <libaokun1@huawei.com>
> 
> The following concurrency may cause a read request to fail to be completed
> and result in a hung:
> 
>            t1             |             t2
> ---------------------------------------------------------
>                             cachefiles_ondemand_copen
>                               req = xa_erase(&cache->reqs, id)
> // Anon fd is maliciously closed.
> cachefiles_ondemand_fd_release
>   xa_lock(&cache->reqs)
>   cachefiles_ondemand_set_object_close(object)
>   xa_unlock(&cache->reqs)
>                               cachefiles_ondemand_set_object_open
>                               // No one will ever close it again.
> cachefiles_ondemand_daemon_read
>   cachefiles_ondemand_select_req
>   // Get a read req but its fd is already closed.
>   // The daemon can't issue a cread ioctl with an closed fd, then hung.
> 
> So add spin_lock for cachefiles_ondemand_info to protect ondemand_id and
> state, thus we can avoid the above problem in cachefiles_ondemand_copen()
> by using ondemand_id to determine if fd has been released.
> 
> Signed-off-by: Baokun Li <libaokun1@huawei.com>

This indeed looks like a reasonable scenario where the kernel side
should fix, as a non-malicious daemon could also run into this.

How about reusing &cache->reqs spinlock rather than introducing a new
spinlock, as &cache->reqs spinlock is already held when setting object
to close state in cachefiles_ondemand_fd_release()?

> ---
>  fs/cachefiles/internal.h |  1 +
>  fs/cachefiles/ondemand.c | 16 +++++++++++++++-
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
> index 7745b8abc3aa..45c8bed60538 100644
> --- a/fs/cachefiles/internal.h
> +++ b/fs/cachefiles/internal.h
> @@ -55,6 +55,7 @@ struct cachefiles_ondemand_info {
>  	int				ondemand_id;
>  	enum cachefiles_object_state	state;
>  	struct cachefiles_object	*object;
> +	spinlock_t			lock;
>  };
>  
>  /*
> diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
> index 898fab68332b..b5e6a851ef04 100644
> --- a/fs/cachefiles/ondemand.c
> +++ b/fs/cachefiles/ondemand.c
> @@ -16,13 +16,16 @@ static int cachefiles_ondemand_fd_release(struct inode *inode,
>  	struct cachefiles_object *object = file->private_data;
>  	struct cachefiles_cache *cache = object->volume->cache;
>  	struct cachefiles_ondemand_info *info = object->ondemand;
> -	int object_id = info->ondemand_id;
> +	int object_id;
>  	struct cachefiles_req *req;
>  	XA_STATE(xas, &cache->reqs, 0);
>  
>  	xa_lock(&cache->reqs);
> +	spin_lock(&info->lock);
> +	object_id = info->ondemand_id;
>  	info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
>  	cachefiles_ondemand_set_object_close(object);
> +	spin_unlock(&info->lock);
>  
>  	/* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */
>  	xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
> @@ -127,6 +130,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
>  {
>  	struct cachefiles_req *req;
>  	struct fscache_cookie *cookie;
> +	struct cachefiles_ondemand_info *info;
>  	char *pid, *psize;
>  	unsigned long id;
>  	long size;
> @@ -185,6 +189,14 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
>  		goto out;
>  	}
>  
> +	info = req->object->ondemand;
> +	spin_lock(&info->lock);

> +	/* The anonymous fd was closed before copen ? */

I would like describe more details in the comment, e.g. put the time
sequence described in the commit message here.

> +	if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) {
> +		spin_unlock(&info->lock);
> +		req->error = -EBADFD;
> +		goto out;
> +	}
>  	cookie = req->object->cookie;
>  	cookie->object_size = size;
>  	if (size)
> @@ -194,6 +206,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
>  	trace_cachefiles_ondemand_copen(req->object, id, size);
>  
>  	cachefiles_ondemand_set_object_open(req->object);
> +	spin_unlock(&info->lock);
>  	wake_up_all(&cache->daemon_pollwq);
>  
>  out:
> @@ -596,6 +609,7 @@ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
>  		return -ENOMEM;
>  
>  	object->ondemand->object = object;
> +	spin_lock_init(&object->ondemand->lock);
>  	INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker);
>  	return 0;
>  }

-- 
Thanks,
Jingbo

Re: [PATCH 07/12] cachefiles: add spin_lock for cachefiles_ondemand_info

Posted by Baokun Li 1 year, 9 months ago

On 2024/5/6 10:55, Jingbo Xu wrote:
>
> On 4/24/24 11:39 AM, libaokun@huaweicloud.com wrote:
>> From: Baokun Li <libaokun1@huawei.com>
>>
>> The following concurrency may cause a read request to fail to be completed
>> and result in a hung:
>>
>>             t1             |             t2
>> ---------------------------------------------------------
>>                              cachefiles_ondemand_copen
>>                                req = xa_erase(&cache->reqs, id)
>> // Anon fd is maliciously closed.
>> cachefiles_ondemand_fd_release
>>    xa_lock(&cache->reqs)
>>    cachefiles_ondemand_set_object_close(object)
>>    xa_unlock(&cache->reqs)
>>                                cachefiles_ondemand_set_object_open
>>                                // No one will ever close it again.
>> cachefiles_ondemand_daemon_read
>>    cachefiles_ondemand_select_req
>>    // Get a read req but its fd is already closed.
>>    // The daemon can't issue a cread ioctl with an closed fd, then hung.
>>
>> So add spin_lock for cachefiles_ondemand_info to protect ondemand_id and
>> state, thus we can avoid the above problem in cachefiles_ondemand_copen()
>> by using ondemand_id to determine if fd has been released.
>>
>> Signed-off-by: Baokun Li <libaokun1@huawei.com>
> This indeed looks like a reasonable scenario where the kernel side
> should fix, as a non-malicious daemon could also run into this.
>
> How about reusing &cache->reqs spinlock rather than introducing a new
> spinlock, as &cache->reqs spinlock is already held when setting object
> to close state in cachefiles_ondemand_fd_release()?
We've considered reusing &cache->reqs spinlock before, but their
uses don't exactly overlap, and there are patches coming that will
use the new spin_lock,. In addition, this reduces competition for
&cache->reqs spinlock.
>> ---
>>   fs/cachefiles/internal.h |  1 +
>>   fs/cachefiles/ondemand.c | 16 +++++++++++++++-
>>   2 files changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
>> index 7745b8abc3aa..45c8bed60538 100644
>> --- a/fs/cachefiles/internal.h
>> +++ b/fs/cachefiles/internal.h
>> @@ -55,6 +55,7 @@ struct cachefiles_ondemand_info {
>>   	int				ondemand_id;
>>   	enum cachefiles_object_state	state;
>>   	struct cachefiles_object	*object;
>> +	spinlock_t			lock;
>>   };
>>   
>>   /*
>> diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
>> index 898fab68332b..b5e6a851ef04 100644
>> --- a/fs/cachefiles/ondemand.c
>> +++ b/fs/cachefiles/ondemand.c
>> @@ -16,13 +16,16 @@ static int cachefiles_ondemand_fd_release(struct inode *inode,
>>   	struct cachefiles_object *object = file->private_data;
>>   	struct cachefiles_cache *cache = object->volume->cache;
>>   	struct cachefiles_ondemand_info *info = object->ondemand;
>> -	int object_id = info->ondemand_id;
>> +	int object_id;
>>   	struct cachefiles_req *req;
>>   	XA_STATE(xas, &cache->reqs, 0);
>>   
>>   	xa_lock(&cache->reqs);
>> +	spin_lock(&info->lock);
>> +	object_id = info->ondemand_id;
>>   	info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
>>   	cachefiles_ondemand_set_object_close(object);
>> +	spin_unlock(&info->lock);
>>   
>>   	/* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */
>>   	xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
>> @@ -127,6 +130,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
>>   {
>>   	struct cachefiles_req *req;
>>   	struct fscache_cookie *cookie;
>> +	struct cachefiles_ondemand_info *info;
>>   	char *pid, *psize;
>>   	unsigned long id;
>>   	long size;
>> @@ -185,6 +189,14 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
>>   		goto out;
>>   	}
>>   
>> +	info = req->object->ondemand;
>> +	spin_lock(&info->lock);
>> +	/* The anonymous fd was closed before copen ? */
> I would like describe more details in the comment, e.g. put the time
> sequence described in the commit message here.
OK, thanks for your suggestion, I will describe it in more detail
in the next revision.

Thanks,
Baokun
>
>> +	if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) {
>> +		spin_unlock(&info->lock);
>> +		req->error = -EBADFD;
>> +		goto out;
>> +	}
>>   	cookie = req->object->cookie;
>>   	cookie->object_size = size;
>>   	if (size)
>> @@ -194,6 +206,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
>>   	trace_cachefiles_ondemand_copen(req->object, id, size);
>>   
>>   	cachefiles_ondemand_set_object_open(req->object);
>> +	spin_unlock(&info->lock);
>>   	wake_up_all(&cache->daemon_pollwq);
>>   
>>   out:
>> @@ -596,6 +609,7 @@ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
>>   		return -ENOMEM;
>>   
>>   	object->ondemand->object = object;
>> +	spin_lock_init(&object->ondemand->lock);
>>   	INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker);
>>   	return 0;
>>   }