alloc_fd() has a sanity check inside to make sure the struct file mapping to the
allocated fd is NULL. Remove this sanity check since it can be assured by
exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
likely/unlikely and expand_file() call avoidance to reduce the work under
file_lock.
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Yu Ma <yu.ma@intel.com>
---
fs/file.c | 38 ++++++++++++++++----------------------
1 file changed, 16 insertions(+), 22 deletions(-)
diff --git a/fs/file.c b/fs/file.c
index a3b72aa64f11..5178b246e54b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
if (fd < files->next_fd)
fd = files->next_fd;
- if (fd < fdt->max_fds)
+ if (likely(fd < fdt->max_fds))
fd = find_next_fd(fdt, fd);
+ error = -EMFILE;
+ if (unlikely(fd >= fdt->max_fds)) {
+ error = expand_files(files, fd);
+ if (error < 0)
+ goto out;
+ /*
+ * If we needed to expand the fs array we
+ * might have blocked - try again.
+ */
+ if (error)
+ goto repeat;
+ }
+
/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
*/
- error = -EMFILE;
- if (fd >= end)
- goto out;
-
- error = expand_files(files, fd);
- if (error < 0)
+ if (unlikely(fd >= end))
goto out;
- /*
- * If we needed to expand the fs array we
- * might have blocked - try again.
- */
- if (error)
- goto repeat;
-
if (start <= files->next_fd)
files->next_fd = fd + 1;
@@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
else
__clear_close_on_exec(fd, fdt);
error = fd;
-#if 1
- /* Sanity check */
- if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
- printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
- rcu_assign_pointer(fdt->fd[fd], NULL);
- }
-#endif
out:
spin_unlock(&files->file_lock);
@@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file)
rcu_read_unlock_sched();
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
- BUG_ON(fdt->fd[fd] != NULL);
+ WARN_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
return;
--
2.43.0
On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
> alloc_fd() has a sanity check inside to make sure the struct file mapping to the
> allocated fd is NULL. Remove this sanity check since it can be assured by
> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
> likely/unlikely and expand_file() call avoidance to reduce the work under
> file_lock.
>
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Yu Ma <yu.ma@intel.com>
> ---
> fs/file.c | 38 ++++++++++++++++----------------------
> 1 file changed, 16 insertions(+), 22 deletions(-)
>
> diff --git a/fs/file.c b/fs/file.c
> index a3b72aa64f11..5178b246e54b 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
> if (fd < files->next_fd)
> fd = files->next_fd;
>
> - if (fd < fdt->max_fds)
> + if (likely(fd < fdt->max_fds))
> fd = find_next_fd(fdt, fd);
>
> + error = -EMFILE;
> + if (unlikely(fd >= fdt->max_fds)) {
> + error = expand_files(files, fd);
> + if (error < 0)
> + goto out;
> + /*
> + * If we needed to expand the fs array we
> + * might have blocked - try again.
> + */
> + if (error)
> + goto repeat;
> + }
So this ends up removing the expand_files() above the fd >= end check
which means that you can end up expanding the files_struct even though
the request fd is past the provided end. That seems odd. What's the
reason for that reordering?
> +
> /*
> * N.B. For clone tasks sharing a files structure, this test
> * will limit the total number of files that can be opened.
> */
> - error = -EMFILE;
> - if (fd >= end)
> - goto out;
> -
> - error = expand_files(files, fd);
> - if (error < 0)
> + if (unlikely(fd >= end))
> goto out;
>
> - /*
> - * If we needed to expand the fs array we
> - * might have blocked - try again.
> - */
> - if (error)
> - goto repeat;
> -
> if (start <= files->next_fd)
> files->next_fd = fd + 1;
>
> @@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
> else
> __clear_close_on_exec(fd, fdt);
> error = fd;
> -#if 1
> - /* Sanity check */
> - if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
> - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
> - rcu_assign_pointer(fdt->fd[fd], NULL);
> - }
> -#endif
>
> out:
> spin_unlock(&files->file_lock);
> @@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file)
> rcu_read_unlock_sched();
> spin_lock(&files->file_lock);
> fdt = files_fdtable(files);
> - BUG_ON(fdt->fd[fd] != NULL);
> + WARN_ON(fdt->fd[fd] != NULL);
> rcu_assign_pointer(fdt->fd[fd], file);
> spin_unlock(&files->file_lock);
> return;
> --
> 2.43.0
>
On Wed 03-07-24 16:34:49, Christian Brauner wrote:
> On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
> > alloc_fd() has a sanity check inside to make sure the struct file mapping to the
> > allocated fd is NULL. Remove this sanity check since it can be assured by
> > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
> > likely/unlikely and expand_file() call avoidance to reduce the work under
> > file_lock.
> >
> > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > Signed-off-by: Yu Ma <yu.ma@intel.com>
> > ---
> > fs/file.c | 38 ++++++++++++++++----------------------
> > 1 file changed, 16 insertions(+), 22 deletions(-)
> >
> > diff --git a/fs/file.c b/fs/file.c
> > index a3b72aa64f11..5178b246e54b 100644
> > --- a/fs/file.c
> > +++ b/fs/file.c
> > @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
> > if (fd < files->next_fd)
> > fd = files->next_fd;
> >
> > - if (fd < fdt->max_fds)
> > + if (likely(fd < fdt->max_fds))
> > fd = find_next_fd(fdt, fd);
> >
> > + error = -EMFILE;
> > + if (unlikely(fd >= fdt->max_fds)) {
> > + error = expand_files(files, fd);
> > + if (error < 0)
> > + goto out;
> > + /*
> > + * If we needed to expand the fs array we
> > + * might have blocked - try again.
> > + */
> > + if (error)
> > + goto repeat;
> > + }
>
> So this ends up removing the expand_files() above the fd >= end check
> which means that you can end up expanding the files_struct even though
> the request fd is past the provided end. That seems odd. What's the
> reason for that reordering?
Yeah, not only that but also:
> > /*
> > * N.B. For clone tasks sharing a files structure, this test
> > * will limit the total number of files that can be opened.
> > */
> > - error = -EMFILE;
> > - if (fd >= end)
> > - goto out;
> > -
> > - error = expand_files(files, fd);
> > - if (error < 0)
> > + if (unlikely(fd >= end))
> > goto out;
We could then exit here with error set to 0 instead of -EMFILE. So this
needs a bit of work. But otherwise the patch looks good to me.
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
On 7/4/2024 6:11 PM, Jan Kara wrote:
> On Wed 03-07-24 16:34:49, Christian Brauner wrote:
>> On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
>>> alloc_fd() has a sanity check inside to make sure the struct file mapping to the
>>> allocated fd is NULL. Remove this sanity check since it can be assured by
>>> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
>>> likely/unlikely and expand_file() call avoidance to reduce the work under
>>> file_lock.
>>>
>>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
>>> Signed-off-by: Yu Ma <yu.ma@intel.com>
>>> ---
>>> fs/file.c | 38 ++++++++++++++++----------------------
>>> 1 file changed, 16 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/fs/file.c b/fs/file.c
>>> index a3b72aa64f11..5178b246e54b 100644
>>> --- a/fs/file.c
>>> +++ b/fs/file.c
>>> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>>> if (fd < files->next_fd)
>>> fd = files->next_fd;
>>>
>>> - if (fd < fdt->max_fds)
>>> + if (likely(fd < fdt->max_fds))
>>> fd = find_next_fd(fdt, fd);
>>>
>>> + error = -EMFILE;
>>> + if (unlikely(fd >= fdt->max_fds)) {
>>> + error = expand_files(files, fd);
>>> + if (error < 0)
>>> + goto out;
>>> + /*
>>> + * If we needed to expand the fs array we
>>> + * might have blocked - try again.
>>> + */
>>> + if (error)
>>> + goto repeat;
>>> + }
>> So this ends up removing the expand_files() above the fd >= end check
>> which means that you can end up expanding the files_struct even though
>> the request fd is past the provided end. That seems odd. What's the
>> reason for that reordering?
> Yeah, not only that but also:
>
>>> /*
>>> * N.B. For clone tasks sharing a files structure, this test
>>> * will limit the total number of files that can be opened.
>>> */
>>> - error = -EMFILE;
>>> - if (fd >= end)
>>> - goto out;
>>> -
>>> - error = expand_files(files, fd);
>>> - if (error < 0)
>>> + if (unlikely(fd >= end))
>>> goto out;
> We could then exit here with error set to 0 instead of -EMFILE. So this
> needs a bit of work. But otherwise the patch looks good to me.
>
> Honza
Do you mean that we return 0 here is fd >=end, I'm afraid that might
broke the original design of this function. And all the callers of it
are using ret < 0 for error handling, if ret=0, that should mean the fd
allocated is 0 ...
On Thu 04-07-24 22:45:32, Ma, Yu wrote:
>
> On 7/4/2024 6:11 PM, Jan Kara wrote:
> > On Wed 03-07-24 16:34:49, Christian Brauner wrote:
> > > On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
> > > > alloc_fd() has a sanity check inside to make sure the struct file mapping to the
> > > > allocated fd is NULL. Remove this sanity check since it can be assured by
> > > > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
> > > > likely/unlikely and expand_file() call avoidance to reduce the work under
> > > > file_lock.
> > > >
> > > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> > > > Signed-off-by: Yu Ma <yu.ma@intel.com>
> > > > ---
> > > > fs/file.c | 38 ++++++++++++++++----------------------
> > > > 1 file changed, 16 insertions(+), 22 deletions(-)
> > > >
> > > > diff --git a/fs/file.c b/fs/file.c
> > > > index a3b72aa64f11..5178b246e54b 100644
> > > > --- a/fs/file.c
> > > > +++ b/fs/file.c
> > > > @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
> > > > if (fd < files->next_fd)
> > > > fd = files->next_fd;
> > > > - if (fd < fdt->max_fds)
> > > > + if (likely(fd < fdt->max_fds))
> > > > fd = find_next_fd(fdt, fd);
> > > > + error = -EMFILE;
> > > > + if (unlikely(fd >= fdt->max_fds)) {
> > > > + error = expand_files(files, fd);
> > > > + if (error < 0)
> > > > + goto out;
> > > > + /*
> > > > + * If we needed to expand the fs array we
> > > > + * might have blocked - try again.
> > > > + */
> > > > + if (error)
> > > > + goto repeat;
> > > > + }
> > > So this ends up removing the expand_files() above the fd >= end check
> > > which means that you can end up expanding the files_struct even though
> > > the request fd is past the provided end. That seems odd. What's the
> > > reason for that reordering?
> > Yeah, not only that but also:
> >
> > > > /*
> > > > * N.B. For clone tasks sharing a files structure, this test
> > > > * will limit the total number of files that can be opened.
> > > > */
> > > > - error = -EMFILE;
> > > > - if (fd >= end)
> > > > - goto out;
> > > > -
> > > > - error = expand_files(files, fd);
> > > > - if (error < 0)
> > > > + if (unlikely(fd >= end))
> > > > goto out;
> > We could then exit here with error set to 0 instead of -EMFILE. So this
> > needs a bit of work. But otherwise the patch looks good to me.
> >
> > Honza
>
> Do you mean that we return 0 here is fd >=end, I'm afraid that might broke
> the original design of this function. And all the callers of it are using
> ret < 0 for error handling, if ret=0, that should mean the fd allocated is 0
> ...
What I meant is that after your changes alloc_fd() could return 0 in fd >=
end case. It could happen if we went through expand_files() which then
returned 0. Anyway, please just fix the ordering of checks and we should be
fine.
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
On 7/3/2024 10:34 PM, Christian Brauner wrote:
> On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote:
>> alloc_fd() has a sanity check inside to make sure the struct file mapping to the
>> allocated fd is NULL. Remove this sanity check since it can be assured by
>> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add
>> likely/unlikely and expand_file() call avoidance to reduce the work under
>> file_lock.
>>
>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
>> Signed-off-by: Yu Ma <yu.ma@intel.com>
>> ---
>> fs/file.c | 38 ++++++++++++++++----------------------
>> 1 file changed, 16 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/file.c b/fs/file.c
>> index a3b72aa64f11..5178b246e54b 100644
>> --- a/fs/file.c
>> +++ b/fs/file.c
>> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>> if (fd < files->next_fd)
>> fd = files->next_fd;
>>
>> - if (fd < fdt->max_fds)
>> + if (likely(fd < fdt->max_fds))
>> fd = find_next_fd(fdt, fd);
>>
>> + error = -EMFILE;
>> + if (unlikely(fd >= fdt->max_fds)) {
>> + error = expand_files(files, fd);
>> + if (error < 0)
>> + goto out;
>> + /*
>> + * If we needed to expand the fs array we
>> + * might have blocked - try again.
>> + */
>> + if (error)
>> + goto repeat;
>> + }
> So this ends up removing the expand_files() above the fd >= end check
> which means that you can end up expanding the files_struct even though
> the request fd is past the provided end. That seems odd. What's the
> reason for that reordering?
Yes, you are right, thanks Christian. This incorrect reordering here is
due to historical versions with fast path inside. I'll update the order
back.
>> +
>> /*
>> * N.B. For clone tasks sharing a files structure, this test
>> * will limit the total number of files that can be opened.
>> */
>> - error = -EMFILE;
>> - if (fd >= end)
>> - goto out;
>> -
>> - error = expand_files(files, fd);
>> - if (error < 0)
>> + if (unlikely(fd >= end))
>> goto out;
>>
>> - /*
>> - * If we needed to expand the fs array we
>> - * might have blocked - try again.
>> - */
>> - if (error)
>> - goto repeat;
>> -
>> if (start <= files->next_fd)
>> files->next_fd = fd + 1;
>>
>> @@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>> else
>> __clear_close_on_exec(fd, fdt);
>> error = fd;
>> -#if 1
>> - /* Sanity check */
>> - if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
>> - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
>> - rcu_assign_pointer(fdt->fd[fd], NULL);
>> - }
>> -#endif
>>
>> out:
>> spin_unlock(&files->file_lock);
>> @@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file)
>> rcu_read_unlock_sched();
>> spin_lock(&files->file_lock);
>> fdt = files_fdtable(files);
>> - BUG_ON(fdt->fd[fd] != NULL);
>> + WARN_ON(fdt->fd[fd] != NULL);
>> rcu_assign_pointer(fdt->fd[fd], file);
>> spin_unlock(&files->file_lock);
>> return;
>> --
>> 2.43.0
>>
© 2016 - 2026 Red Hat, Inc.