[v1] binder: convert alloc->mutex to spinlock

[PATCH 19/21] binder: perform page allocation outside of locks

Posted by Carlos Llamas 2 years, 3 months ago

Split out the insertion of pages to be done outside of the alloc->mutex
in a separate binder_get_pages_range() routine. Since this is no longer
serialized with other requests we need to make sure we look at the full
range of pages for this buffer, including those shared with neighboring
buffers. The insertion of pages into the vma is still serialized with
the mmap write lock.

Besides avoiding unnecessary nested locking this helps in preparation of
switching the alloc->mutex into a spinlock_t in subsequent patches.

Signed-off-by: Carlos Llamas <cmllamas@google.com>
---
 drivers/android/binder_alloc.c | 85 ++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 29 deletions(-)

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 7e0af1786b20..e739be7f2dd4 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -194,6 +194,9 @@ static void binder_free_page_range(struct binder_alloc *alloc,
 		index = (page_addr - alloc->buffer) / PAGE_SIZE;
 		page = &alloc->pages[index];
 
+		if (!page->page_ptr)
+			continue;
+
 		trace_binder_free_lru_start(alloc, index);
 
 		ret = list_lru_add(&binder_alloc_lru, &page->lru);
@@ -214,6 +217,9 @@ static int binder_get_user_page_remote(struct binder_alloc *alloc,
 		return -ESRCH;
 
 	mmap_write_lock(alloc->mm);
+	if (lru_page->page_ptr)
+		goto out;
+
 	if (!alloc->vma) {
 		pr_err("%d: %s failed, no vma\n", alloc->pid, __func__);
 		ret = -ESRCH;
@@ -236,32 +242,64 @@ static int binder_get_user_page_remote(struct binder_alloc *alloc,
 		goto out;
 	}
 
-	lru_page->page_ptr = page;
+	/* mark page insertion complete and safe to acquire */
+	smp_store_release(&lru_page->page_ptr, page);
 out:
 	mmap_write_unlock(alloc->mm);
 	mmput_async(alloc->mm);
 	return ret;
 }
 
-static int binder_allocate_page_range(struct binder_alloc *alloc,
-				      unsigned long start, unsigned long end)
+/* The range of pages should include those shared with other buffers */
+static int binder_get_page_range(struct binder_alloc *alloc,
+				 unsigned long start, unsigned long end)
 {
 	struct binder_lru_page *page;
 	unsigned long page_addr;
 
 	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
-			   "%d: allocate pages %lx-%lx\n",
+			   "%d: get pages %lx-%lx\n",
 			   alloc->pid, start, end);
 
-	if (end <= start)
-		return 0;
+	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
+		unsigned long index;
+		int ret;
+
+		index = (page_addr - alloc->buffer) / PAGE_SIZE;
+		page = &alloc->pages[index];
+
+		/* check if page insertion is marked complete by release */
+		if (smp_load_acquire(&page->page_ptr))
+			continue;
+
+		trace_binder_alloc_page_start(alloc, index);
+
+		ret = binder_get_user_page_remote(alloc, page, page_addr);
+		if (ret)
+			return ret;
+
+		trace_binder_alloc_page_end(alloc, index);
+	}
+
+	return 0;
+}
+
+/* The range of pages should exclude those shared with other buffers */
+static void binder_allocate_page_range(struct binder_alloc *alloc,
+				       unsigned long start, unsigned long end)
+{
+	struct binder_lru_page *page;
+	unsigned long page_addr;
+
+	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
+			   "%d: allocate pages %lx-%lx\n",
+			   alloc->pid, start, end);
 
 	trace_binder_update_page_range(alloc, true, start, end);
 
 	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
 		unsigned long index;
 		bool on_lru;
-		int ret;
 
 		index = (page_addr - alloc->buffer) / PAGE_SIZE;
 		page = &alloc->pages[index];
@@ -276,21 +314,9 @@ static int binder_allocate_page_range(struct binder_alloc *alloc,
 			continue;
 		}
 
-		trace_binder_alloc_page_start(alloc, index);
-
-		ret = binder_get_user_page_remote(alloc, page, page_addr);
-		if (ret) {
-			binder_free_page_range(alloc, start, page_addr);
-			return ret;
-		}
-
 		if (index + 1 > alloc->pages_high)
 			alloc->pages_high = index + 1;
-
-		trace_binder_alloc_page_end(alloc, index);
 	}
-
-	return 0;
 }
 
 static inline void binder_alloc_set_vma(struct binder_alloc *alloc,
@@ -410,7 +436,6 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 	unsigned long has_page_addr;
 	unsigned long end_page_addr;
 	size_t buffer_size;
-	int ret;
 
 	if (is_async &&
 	    alloc->free_async_space < size + sizeof(struct binder_buffer)) {
@@ -453,18 +478,14 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 		     "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n",
 		      alloc->pid, size, buffer, buffer_size);
 
-	has_page_addr = (buffer->user_data + buffer_size) & PAGE_MASK;
 	WARN_ON(n && buffer_size != size);
+
+	has_page_addr = (buffer->user_data + buffer_size) & PAGE_MASK;
 	end_page_addr = PAGE_ALIGN(buffer->user_data + size);
 	if (end_page_addr > has_page_addr)
 		end_page_addr = has_page_addr;
-	ret = binder_allocate_page_range(alloc, PAGE_ALIGN(buffer->user_data),
-					 end_page_addr);
-	if (ret) {
-		buffer = ERR_PTR(ret);
-		goto out;
-	}
-
+	binder_allocate_page_range(alloc, PAGE_ALIGN(buffer->user_data),
+				   end_page_addr);
 	if (buffer_size != size) {
 		new_buffer->user_data = buffer->user_data + size;
 		list_add(&new_buffer->entry, &buffer->entry);
@@ -491,7 +512,6 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 			buffer->oneway_spam_suspect = true;
 	}
 
-out:
 	/* discard possibly unused new_buffer */
 	kfree(new_buffer);
 	return buffer;
@@ -520,6 +540,7 @@ struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
 {
 	struct binder_buffer *buffer, *next;
 	size_t size;
+	int ret;
 
 	/* Check binder_alloc is fully initialized */
 	if (!binder_alloc_get_vma(alloc)) {
@@ -564,6 +585,12 @@ struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
 	buffer->extra_buffers_size = extra_buffers_size;
 	buffer->pid = current->tgid;
 
+	ret = binder_get_page_range(alloc, buffer->user_data & PAGE_MASK,
+				    PAGE_ALIGN(buffer->user_data + size));
+	if (ret) {
+		binder_alloc_free_buf(alloc, buffer);
+		buffer = ERR_PTR(ret);
+	}
 out:
 	return buffer;
 }
-- 
2.42.0.869.gea05f2083d-goog

Re: [PATCH 19/21] binder: perform page allocation outside of locks

Posted by Alice Ryhl 2 years, 3 months ago

Carlos Llamas <cmllamas@google.com> writes:
> Split out the insertion of pages to be done outside of the alloc->mutex
> in a separate binder_get_pages_range() routine. Since this is no longer
> serialized with other requests we need to make sure we look at the full
> range of pages for this buffer, including those shared with neighboring
> buffers. The insertion of pages into the vma is still serialized with
> the mmap write lock.
> 
> Besides avoiding unnecessary nested locking this helps in preparation of
> switching the alloc->mutex into a spinlock_t in subsequent patches.
> 
> Signed-off-by: Carlos Llamas <cmllamas@google.com>

This is rather complex, so it could use more comments. However, as far
as I can tell, the change is correct.

> +/* The range of pages should include those shared with other buffers */
> +static int binder_get_page_range(struct binder_alloc *alloc,
> +				 unsigned long start, unsigned long end)
> [snip]
> +/* The range of pages should exclude those shared with other buffers */
> +static void binder_allocate_page_range(struct binder_alloc *alloc,
> +				       unsigned long start, unsigned long end)

I would really like a comment on each function explaining that:

 * The binder_allocate_page_range function ensures that existing pages
   will not be reclaimed by the shrinker.
 * The binder_get_page_range function ensures that missing pages are
   allocated and inserted.

I also don't think the names are great. The function with "allocate" in
its name is the one that doesn't perform any allocations.

>  	mmap_write_lock(alloc->mm);
> +	if (lru_page->page_ptr)
> +		goto out;

Another comment that I'd like to see somewhere is one that says
something along these lines:

    Multiple processes may call `binder_get_user_page_remote` on the
    same page in parallel. When this happens, one of them will allocate
    the page and insert it, and the other process will use the mmap
    write lock to wait for the insertion to complete. This means that we
    can't use a mmap read lock here.

> +	/* mark page insertion complete and safe to acquire */
> +	smp_store_release(&lru_page->page_ptr, page);
> [snip]
> +		/* check if page insertion is marked complete by release */
> +		if (smp_load_acquire(&page->page_ptr))
> +			continue;

We already discussed this when I asked you to make this an acquire /
release operation so that it isn't racy, but it could use a comment
explaining its purpose.

>  	mmap_write_lock(alloc->mm);
> +	if (lru_page->page_ptr)
> +		goto out;
> +
>  	if (!alloc->vma) {
>  		pr_err("%d: %s failed, no vma\n", alloc->pid, __func__);
>  		ret = -ESRCH;
>  		goto out;
>  	}
>  
>  	page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
>  	if (!page) {
>  		pr_err("%d: failed to allocate page\n", alloc->pid);
>  		ret = -ENOMEM;
>  		goto out;
>  	}

Maybe it would be worth to allocate the page before taking the mmap
write lock? It has the disadvantage that you may have to immediately
deallocate it if we trigger the `if (lru_page->page_ptr) goto out`
branch, but that shouldn't happen that often, and it would reduce the
amount of time we spend holding the mmap write lock.

Alice

Re: [PATCH 19/21] binder: perform page allocation outside of locks

Posted by Carlos Llamas 2 years, 2 months ago

On Tue, Nov 07, 2023 at 09:08:43AM +0000, Alice Ryhl wrote:
> I would really like a comment on each function explaining that:
> 
>  * The binder_allocate_page_range function ensures that existing pages
>    will not be reclaimed by the shrinker.
>  * The binder_get_page_range function ensures that missing pages are
>    allocated and inserted.

Ok, I think I rather go for a better naming than compensating through
comments, so I came up with the following names:
 - binder_lru_freelist_{add,del}()
 - binder_install_buffer_pages()

There will be more details in the v2. The new names give a clear
separation of the scope of these function.

> >  	mmap_write_lock(alloc->mm);
> > +	if (lru_page->page_ptr)
> > +		goto out;
> 
> Another comment that I'd like to see somewhere is one that says
> something along these lines:
> 
>     Multiple processes may call `binder_get_user_page_remote` on the
>     same page in parallel. When this happens, one of them will allocate
>     the page and insert it, and the other process will use the mmap
>     write lock to wait for the insertion to complete. This means that we
>     can't use a mmap read lock here.
> 

I've added a shorter version of this to v2, thanks.

> > +	/* mark page insertion complete and safe to acquire */
> > +	smp_store_release(&lru_page->page_ptr, page);
> > [snip]
> > +		/* check if page insertion is marked complete by release */
> > +		if (smp_load_acquire(&page->page_ptr))
> > +			continue;
> 
> We already discussed this when I asked you to make this an acquire /
> release operation so that it isn't racy, but it could use a comment
> explaining its purpose.

I've wrapped these calls into inline functions with better names in v2.
The purpose should now be evident.

> 
> >  	mmap_write_lock(alloc->mm);
> > +	if (lru_page->page_ptr)
> > +		goto out;
> > +
> >  	if (!alloc->vma) {
> >  		pr_err("%d: %s failed, no vma\n", alloc->pid, __func__);
> >  		ret = -ESRCH;
> >  		goto out;
> >  	}
> >  
> >  	page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
> >  	if (!page) {
> >  		pr_err("%d: failed to allocate page\n", alloc->pid);
> >  		ret = -ENOMEM;
> >  		goto out;
> >  	}
> 
> Maybe it would be worth to allocate the page before taking the mmap
> write lock? It has the disadvantage that you may have to immediately
> deallocate it if we trigger the `if (lru_page->page_ptr) goto out`
> branch, but that shouldn't happen that often, and it would reduce the
> amount of time we spend holding the mmap write lock.

If we sleep on alloc_page() then chances are that having other tasks
allocating more pages could create more memory pressure. In some cases
this would be unecessary (e.g. if it's the same page). I do think this
could happen often since buffer requests tend to be < PAGE_SIZE and
adjecent too. I'll look into this with more detail and send a follow up
patch if needed. Thanks!

--
Carlos Llamas