fs/pipe.c | 67 +++++++++++++++++++++++++++------------ include/linux/pipe_fs_i.h | 2 +- 2 files changed, 48 insertions(+), 21 deletions(-)
User data is kept in a circular buffer backed by pages allocated as
needed. Only having space for one spare is still prone to having to
resort to allocation / freeing.
In my testing this decreases page allocs by 60% during a -j 20 kernel
build.
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
fs/pipe.c | 67 +++++++++++++++++++++++++++------------
include/linux/pipe_fs_i.h | 2 +-
2 files changed, 48 insertions(+), 21 deletions(-)
diff --git a/fs/pipe.c b/fs/pipe.c
index 19a7948ab234..2508d14e8812 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -112,20 +112,47 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
pipe_lock(pipe2);
}
+static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
+{
+ struct page *page;
+
+ if (pipe->tmp_page[0]) {
+ page = pipe->tmp_page[0];
+ pipe->tmp_page[0] = NULL;
+ } else if (pipe->tmp_page[1]) {
+ page = pipe->tmp_page[1];
+ pipe->tmp_page[1] = NULL;
+ } else {
+ page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
+ }
+
+ return page;
+}
+
+static void anon_pipe_put_page(struct pipe_inode_info *pipe,
+ struct page *page)
+{
+ if (page_count(page) == 1) {
+ if (!pipe->tmp_page[0]) {
+ pipe->tmp_page[0] = page;
+ return;
+ }
+
+ if (!pipe->tmp_page[1]) {
+ pipe->tmp_page[1] = page;
+ return;
+ }
+ }
+
+ put_page(page);
+}
+
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct page *page = buf->page;
- /*
- * If nobody else uses this page, and we don't already have a
- * temporary page, let's keep track of it as a one-deep
- * allocation cache. (Otherwise just release our reference to it)
- */
- if (page_count(page) == 1 && !pipe->tmp_page)
- pipe->tmp_page = page;
- else
- put_page(page);
+ anon_pipe_put_page(pipe, page);
}
static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
@@ -493,27 +520,25 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf;
- struct page *page = pipe->tmp_page;
+ struct page *page;
int copied;
- if (!page) {
- page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
- if (unlikely(!page)) {
- ret = ret ? : -ENOMEM;
- break;
- }
- pipe->tmp_page = page;
+ page = anon_pipe_get_page(pipe);
+ if (unlikely(!page)) {
+ if (!ret)
+ ret = -ENOMEM;
+ break;
}
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
+ anon_pipe_put_page(pipe, page);
if (!ret)
ret = -EFAULT;
break;
}
pipe->head = head + 1;
- pipe->tmp_page = NULL;
/* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
buf->page = page;
@@ -847,8 +872,10 @@ void free_pipe_info(struct pipe_inode_info *pipe)
if (pipe->watch_queue)
put_watch_queue(pipe->watch_queue);
#endif
- if (pipe->tmp_page)
- __free_page(pipe->tmp_page);
+ if (pipe->tmp_page[0])
+ __free_page(pipe->tmp_page[0]);
+ if (pipe->tmp_page[1])
+ __free_page(pipe->tmp_page[1]);
kfree(pipe->bufs);
kfree(pipe);
}
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8ff23bf5a819..eb7994a1ff93 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -72,7 +72,7 @@ struct pipe_inode_info {
#ifdef CONFIG_WATCH_QUEUE
bool note_loss;
#endif
- struct page *tmp_page;
+ struct page *tmp_page[2];
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;
--
2.43.0
I already had a lot of beer, can't read this patch, just one nit...
On 02/27, Mateusz Guzik wrote:
>
> User data is kept in a circular buffer backed by pages allocated as
> needed. Only having space for one spare is still prone to having to
> resort to allocation / freeing.
>
> In my testing this decreases page allocs by 60% during a -j 20 kernel
> build.
So this is performance improvement?
> +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
> +{
> + struct page *page;
> +
> + if (pipe->tmp_page[0]) {
> + page = pipe->tmp_page[0];
> + pipe->tmp_page[0] = NULL;
> + } else if (pipe->tmp_page[1]) {
> + page = pipe->tmp_page[1];
> + pipe->tmp_page[1] = NULL;
> + } else {
> + page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> + }
> +
> + return page;
> +}
Perhaps something like
for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
if (pipe->tmp_page[i]) {
struct page *page = pipe->tmp_page[i];
pipe->tmp_page[i] = NULL;
return page;
}
}
return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
?
Same for anon_pipe_put_page() and free_pipe_info().
This avoids the code duplication and allows to change the size of
pipe->tmp_page[] array without other changes.
Oleg.
On Thu, Feb 27, 2025 at 10:59 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
> > +{
> > + struct page *page;
> > +
> > + if (pipe->tmp_page[0]) {
> > + page = pipe->tmp_page[0];
> > + pipe->tmp_page[0] = NULL;
> > + } else if (pipe->tmp_page[1]) {
> > + page = pipe->tmp_page[1];
> > + pipe->tmp_page[1] = NULL;
> > + } else {
> > + page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> > + }
> > +
> > + return page;
> > +}
>
> Perhaps something like
>
> for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
> if (pipe->tmp_page[i]) {
> struct page *page = pipe->tmp_page[i];
> pipe->tmp_page[i] = NULL;
> return page;
> }
> }
>
> return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> ?
>
> Same for anon_pipe_put_page() and free_pipe_info().
>
> This avoids the code duplication and allows to change the size of
> pipe->tmp_page[] array without other changes.
>
I have almost no opinion one way or the other and I'm not going to
argue about this bit. I only note I don't expect there is a legit
reason to go beyond 2 pages here. As in if more is warranted, the
approach to baking the area should probably change.
I started with this being spelled out so that I have easier time
toggling the extra slot for testing.
That said, I don't know who counts as the pipe man today. I can do the
needful(tm) no problem.
--
Mateusz Guzik <mjguzik gmail.com>
On Thu, Feb 27, 2025 at 11:07:45PM +0100, Mateusz Guzik wrote:
> On Thu, Feb 27, 2025 at 10:59 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > > +static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
> > > +{
> > > + struct page *page;
> > > +
> > > + if (pipe->tmp_page[0]) {
> > > + page = pipe->tmp_page[0];
> > > + pipe->tmp_page[0] = NULL;
> > > + } else if (pipe->tmp_page[1]) {
> > > + page = pipe->tmp_page[1];
> > > + pipe->tmp_page[1] = NULL;
> > > + } else {
> > > + page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> > > + }
> > > +
> > > + return page;
> > > +}
> >
> > Perhaps something like
> >
> > for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
> > if (pipe->tmp_page[i]) {
> > struct page *page = pipe->tmp_page[i];
> > pipe->tmp_page[i] = NULL;
> > return page;
> > }
> > }
> >
> > return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> > ?
> >
> > Same for anon_pipe_put_page() and free_pipe_info().
> >
> > This avoids the code duplication and allows to change the size of
> > pipe->tmp_page[] array without other changes.
> >
>
> I have almost no opinion one way or the other and I'm not going to
> argue about this bit. I only note I don't expect there is a legit
> reason to go beyond 2 pages here. As in if more is warranted, the
> approach to baking the area should probably change.
>
> I started with this being spelled out so that I have easier time
> toggling the extra slot for testing.
>
> That said, I don't know who counts as the pipe man today. I can do the
Linus or David should have the most detailed knowledge.
> needful(tm) no problem.
© 2016 - 2025 Red Hat, Inc.