[v3] fs/file.c: optimize the critical section of file_lock in

[PATCH v3 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()

Posted by Yu Ma 1 year, 7 months ago

alloc_fd() has a sanity check inside to make sure the struct file mapping to the
allocated fd is NULL. Remove this sanity check since it can be assured by
exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
likely/unlikely and expand_file() call avoidance to reduce the work under
file_lock.

Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Yu Ma <yu.ma@intel.com>
---
 fs/file.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index a3b72aa64f11..5178b246e54b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
 	if (fd < files->next_fd)
 		fd = files->next_fd;
 
-	if (fd < fdt->max_fds)
+	if (likely(fd < fdt->max_fds))
 		fd = find_next_fd(fdt, fd);
 
+	error = -EMFILE;
+	if (unlikely(fd >= fdt->max_fds)) {
+		error = expand_files(files, fd);
+		if (error < 0)
+			goto out;
+		/*
+		 * If we needed to expand the fs array we
+		 * might have blocked - try again.
+		 */
+		if (error)
+			goto repeat;
+	}
+
 	/*
 	 * N.B. For clone tasks sharing a files structure, this test
 	 * will limit the total number of files that can be opened.
 	 */
-	error = -EMFILE;
-	if (fd >= end)
-		goto out;
-
-	error = expand_files(files, fd);
-	if (error < 0)
+	if (unlikely(fd >= end))
 		goto out;
 
-	/*
-	 * If we needed to expand the fs array we
-	 * might have blocked - try again.
-	 */
-	if (error)
-		goto repeat;
-
 	if (start <= files->next_fd)
 		files->next_fd = fd + 1;
 
@@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
 	else
 		__clear_close_on_exec(fd, fdt);
 	error = fd;
-#if 1
-	/* Sanity check */
-	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
-		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
-		rcu_assign_pointer(fdt->fd[fd], NULL);
-	}
-#endif
 
 out:
 	spin_unlock(&files->file_lock);
@@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file)
 		rcu_read_unlock_sched();
 		spin_lock(&files->file_lock);
 		fdt = files_fdtable(files);
-		BUG_ON(fdt->fd[fd] != NULL);
+		WARN_ON(fdt->fd[fd] != NULL);
 		rcu_assign_pointer(fdt->fd[fd], file);
 		spin_unlock(&files->file_lock);
 		return;
-- 
2.43.0

Re: [PATCH v3 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()

Posted by Christian Brauner 1 year, 7 months ago

On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
> alloc_fd() has a sanity check inside to make sure the struct file mapping to the
> allocated fd is NULL. Remove this sanity check since it can be assured by
> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
> likely/unlikely and expand_file() call avoidance to reduce the work under
> file_lock.
> 
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Yu Ma <yu.ma@intel.com>
> ---
>  fs/file.c | 38 ++++++++++++++++----------------------
>  1 file changed, 16 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/file.c b/fs/file.c
> index a3b72aa64f11..5178b246e54b 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>  	if (fd < files->next_fd)
>  		fd = files->next_fd;
>  
> -	if (fd < fdt->max_fds)
> +	if (likely(fd < fdt->max_fds))
>  		fd = find_next_fd(fdt, fd);
>  
> +	error = -EMFILE;
> +	if (unlikely(fd >= fdt->max_fds)) {
> +		error = expand_files(files, fd);
> +		if (error < 0)
> +			goto out;
> +		/*
> +		 * If we needed to expand the fs array we
> +		 * might have blocked - try again.
> +		 */
> +		if (error)
> +			goto repeat;
> +	}

So this ends up removing the expand_files() above the fd >= end check
which means that you can end up expanding the files_struct even though
the request fd is past the provided end. That seems odd. What's the
reason for that reordering?

> +
>  	/*
>  	 * N.B. For clone tasks sharing a files structure, this test
>  	 * will limit the total number of files that can be opened.
>  	 */
> -	error = -EMFILE;
> -	if (fd >= end)
> -		goto out;
> -
> -	error = expand_files(files, fd);
> -	if (error < 0)
> +	if (unlikely(fd >= end))
>  		goto out;
>  
> -	/*
> -	 * If we needed to expand the fs array we
> -	 * might have blocked - try again.
> -	 */
> -	if (error)
> -		goto repeat;
> -
>  	if (start <= files->next_fd)
>  		files->next_fd = fd + 1;
>  
> @@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>  	else
>  		__clear_close_on_exec(fd, fdt);
>  	error = fd;
> -#if 1
> -	/* Sanity check */
> -	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
> -		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
> -		rcu_assign_pointer(fdt->fd[fd], NULL);
> -	}
> -#endif
>  
>  out:
>  	spin_unlock(&files->file_lock);
> @@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file)
>  		rcu_read_unlock_sched();
>  		spin_lock(&files->file_lock);
>  		fdt = files_fdtable(files);
> -		BUG_ON(fdt->fd[fd] != NULL);
> +		WARN_ON(fdt->fd[fd] != NULL);
>  		rcu_assign_pointer(fdt->fd[fd], file);
>  		spin_unlock(&files->file_lock);
>  		return;
> -- 
> 2.43.0
>

Re: [PATCH v3 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()

Posted by Jan Kara 1 year, 7 months ago

On Wed 03-07-24 16:34:49, Christian Brauner wrote:
> On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
> > alloc_fd() has a sanity check inside to make sure the struct file mapping to the
> > allocated fd is NULL. Remove this sanity check since it can be assured by
> > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
> > likely/unlikely and expand_file() call avoidance to reduce the work under
> > file_lock.
> > 
> > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Yu Ma <yu.ma@intel.com>
> > ---
> >  fs/file.c | 38 ++++++++++++++++----------------------
> >  1 file changed, 16 insertions(+), 22 deletions(-)
> > 
> > diff --git a/fs/file.c b/fs/file.c
> > index a3b72aa64f11..5178b246e54b 100644
> > --- a/fs/file.c
> > +++ b/fs/file.c
> > @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
> >  	if (fd < files->next_fd)
> >  		fd = files->next_fd;
> >  
> > -	if (fd < fdt->max_fds)
> > +	if (likely(fd < fdt->max_fds))
> >  		fd = find_next_fd(fdt, fd);
> >  
> > +	error = -EMFILE;
> > +	if (unlikely(fd >= fdt->max_fds)) {
> > +		error = expand_files(files, fd);
> > +		if (error < 0)
> > +			goto out;
> > +		/*
> > +		 * If we needed to expand the fs array we
> > +		 * might have blocked - try again.
> > +		 */
> > +		if (error)
> > +			goto repeat;
> > +	}
> 
> So this ends up removing the expand_files() above the fd >= end check
> which means that you can end up expanding the files_struct even though
> the request fd is past the provided end. That seems odd. What's the
> reason for that reordering?

Yeah, not only that but also:

> >  	/*
> >  	 * N.B. For clone tasks sharing a files structure, this test
> >  	 * will limit the total number of files that can be opened.
> >  	 */
> > -	error = -EMFILE;
> > -	if (fd >= end)
> > -		goto out;
> > -
> > -	error = expand_files(files, fd);
> > -	if (error < 0)
> > +	if (unlikely(fd >= end))
> >  		goto out;

We could then exit here with error set to 0 instead of -EMFILE. So this
needs a bit of work. But otherwise the patch looks good to me.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

Re: [PATCH v3 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()

Posted by Ma, Yu 1 year, 7 months ago

On 7/4/2024 6:11 PM, Jan Kara wrote:
> On Wed 03-07-24 16:34:49, Christian Brauner wrote:
>> On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
>>> alloc_fd() has a sanity check inside to make sure the struct file mapping to the
>>> allocated fd is NULL. Remove this sanity check since it can be assured by
>>> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
>>> likely/unlikely and expand_file() call avoidance to reduce the work under
>>> file_lock.
>>>
>>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
>>> Signed-off-by: Yu Ma <yu.ma@intel.com>
>>> ---
>>>   fs/file.c | 38 ++++++++++++++++----------------------
>>>   1 file changed, 16 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/fs/file.c b/fs/file.c
>>> index a3b72aa64f11..5178b246e54b 100644
>>> --- a/fs/file.c
>>> +++ b/fs/file.c
>>> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>>>   	if (fd < files->next_fd)
>>>   		fd = files->next_fd;
>>>   
>>> -	if (fd < fdt->max_fds)
>>> +	if (likely(fd < fdt->max_fds))
>>>   		fd = find_next_fd(fdt, fd);
>>>   
>>> +	error = -EMFILE;
>>> +	if (unlikely(fd >= fdt->max_fds)) {
>>> +		error = expand_files(files, fd);
>>> +		if (error < 0)
>>> +			goto out;
>>> +		/*
>>> +		 * If we needed to expand the fs array we
>>> +		 * might have blocked - try again.
>>> +		 */
>>> +		if (error)
>>> +			goto repeat;
>>> +	}
>> So this ends up removing the expand_files() above the fd >= end check
>> which means that you can end up expanding the files_struct even though
>> the request fd is past the provided end. That seems odd. What's the
>> reason for that reordering?
> Yeah, not only that but also:
>
>>>   	/*
>>>   	 * N.B. For clone tasks sharing a files structure, this test
>>>   	 * will limit the total number of files that can be opened.
>>>   	 */
>>> -	error = -EMFILE;
>>> -	if (fd >= end)
>>> -		goto out;
>>> -
>>> -	error = expand_files(files, fd);
>>> -	if (error < 0)
>>> +	if (unlikely(fd >= end))
>>>   		goto out;
> We could then exit here with error set to 0 instead of -EMFILE. So this
> needs a bit of work. But otherwise the patch looks good to me.
>
> 								Honza

Do you mean that we return 0 here is fd >=end, I'm afraid that might 
broke the original design of this function. And all the callers of it 
are using ret < 0 for error handling, if ret=0, that should mean the fd 
allocated is 0 ...

Re: [PATCH v3 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()

Posted by Jan Kara 1 year, 7 months ago

On Thu 04-07-24 22:45:32, Ma, Yu wrote:
> 
> On 7/4/2024 6:11 PM, Jan Kara wrote:
> > On Wed 03-07-24 16:34:49, Christian Brauner wrote:
> > > On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
> > > > alloc_fd() has a sanity check inside to make sure the struct file mapping to the
> > > > allocated fd is NULL. Remove this sanity check since it can be assured by
> > > > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
> > > > likely/unlikely and expand_file() call avoidance to reduce the work under
> > > > file_lock.
> > > > 
> > > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > > > Signed-off-by: Yu Ma <yu.ma@intel.com>
> > > > ---
> > > >   fs/file.c | 38 ++++++++++++++++----------------------
> > > >   1 file changed, 16 insertions(+), 22 deletions(-)
> > > > 
> > > > diff --git a/fs/file.c b/fs/file.c
> > > > index a3b72aa64f11..5178b246e54b 100644
> > > > --- a/fs/file.c
> > > > +++ b/fs/file.c
> > > > @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
> > > >   	if (fd < files->next_fd)
> > > >   		fd = files->next_fd;
> > > > -	if (fd < fdt->max_fds)
> > > > +	if (likely(fd < fdt->max_fds))
> > > >   		fd = find_next_fd(fdt, fd);
> > > > +	error = -EMFILE;
> > > > +	if (unlikely(fd >= fdt->max_fds)) {
> > > > +		error = expand_files(files, fd);
> > > > +		if (error < 0)
> > > > +			goto out;
> > > > +		/*
> > > > +		 * If we needed to expand the fs array we
> > > > +		 * might have blocked - try again.
> > > > +		 */
> > > > +		if (error)
> > > > +			goto repeat;
> > > > +	}
> > > So this ends up removing the expand_files() above the fd >= end check
> > > which means that you can end up expanding the files_struct even though
> > > the request fd is past the provided end. That seems odd. What's the
> > > reason for that reordering?
> > Yeah, not only that but also:
> > 
> > > >   	/*
> > > >   	 * N.B. For clone tasks sharing a files structure, this test
> > > >   	 * will limit the total number of files that can be opened.
> > > >   	 */
> > > > -	error = -EMFILE;
> > > > -	if (fd >= end)
> > > > -		goto out;
> > > > -
> > > > -	error = expand_files(files, fd);
> > > > -	if (error < 0)
> > > > +	if (unlikely(fd >= end))
> > > >   		goto out;
> > We could then exit here with error set to 0 instead of -EMFILE. So this
> > needs a bit of work. But otherwise the patch looks good to me.
> > 
> > 								Honza
> 
> Do you mean that we return 0 here is fd >=end, I'm afraid that might broke
> the original design of this function. And all the callers of it are using
> ret < 0 for error handling, if ret=0, that should mean the fd allocated is 0
> ...

What I meant is that after your changes alloc_fd() could return 0 in fd >=
end case. It could happen if we went through expand_files() which then
returned 0. Anyway, please just fix the ordering of checks and we should be
fine.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

Re: [PATCH v3 1/3] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd()

Posted by Ma, Yu 1 year, 7 months ago

On 7/3/2024 10:34 PM, Christian Brauner wrote:
> On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
>> alloc_fd() has a sanity check inside to make sure the struct file mapping to the
>> allocated fd is NULL. Remove this sanity check since it can be assured by
>> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
>> likely/unlikely and expand_file() call avoidance to reduce the work under
>> file_lock.
>>
>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
>> Signed-off-by: Yu Ma <yu.ma@intel.com>
>> ---
>>   fs/file.c | 38 ++++++++++++++++----------------------
>>   1 file changed, 16 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/file.c b/fs/file.c
>> index a3b72aa64f11..5178b246e54b 100644
>> --- a/fs/file.c
>> +++ b/fs/file.c
>> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>>   	if (fd < files->next_fd)
>>   		fd = files->next_fd;
>>   
>> -	if (fd < fdt->max_fds)
>> +	if (likely(fd < fdt->max_fds))
>>   		fd = find_next_fd(fdt, fd);
>>   
>> +	error = -EMFILE;
>> +	if (unlikely(fd >= fdt->max_fds)) {
>> +		error = expand_files(files, fd);
>> +		if (error < 0)
>> +			goto out;
>> +		/*
>> +		 * If we needed to expand the fs array we
>> +		 * might have blocked - try again.
>> +		 */
>> +		if (error)
>> +			goto repeat;
>> +	}
> So this ends up removing the expand_files() above the fd >= end check
> which means that you can end up expanding the files_struct even though
> the request fd is past the provided end. That seems odd. What's the
> reason for that reordering?

Yes, you are right, thanks Christian. This incorrect reordering here is 
due to historical versions with fast path inside. I'll update the order 
back.

>> +
>>   	/*
>>   	 * N.B. For clone tasks sharing a files structure, this test
>>   	 * will limit the total number of files that can be opened.
>>   	 */
>> -	error = -EMFILE;
>> -	if (fd >= end)
>> -		goto out;
>> -
>> -	error = expand_files(files, fd);
>> -	if (error < 0)
>> +	if (unlikely(fd >= end))
>>   		goto out;
>>   
>> -	/*
>> -	 * If we needed to expand the fs array we
>> -	 * might have blocked - try again.
>> -	 */
>> -	if (error)
>> -		goto repeat;
>> -
>>   	if (start <= files->next_fd)
>>   		files->next_fd = fd + 1;
>>   
>> @@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>>   	else
>>   		__clear_close_on_exec(fd, fdt);
>>   	error = fd;
>> -#if 1
>> -	/* Sanity check */
>> -	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
>> -		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
>> -		rcu_assign_pointer(fdt->fd[fd], NULL);
>> -	}
>> -#endif
>>   
>>   out:
>>   	spin_unlock(&files->file_lock);
>> @@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file)
>>   		rcu_read_unlock_sched();
>>   		spin_lock(&files->file_lock);
>>   		fdt = files_fdtable(files);
>> -		BUG_ON(fdt->fd[fd] != NULL);
>> +		WARN_ON(fdt->fd[fd] != NULL);
>>   		rcu_assign_pointer(fdt->fd[fd], file);
>>   		spin_unlock(&files->file_lock);
>>   		return;
>> -- 
>> 2.43.0
>>