[PATCH] futex: Use a folio instead of a page

Matthew Wilcox (Oracle) posted 1 patch 2 years, 3 months ago
There is a newer version of this series
kernel/futex/core.c | 67 ++++++++++++++++++++++-----------------------
1 file changed, 33 insertions(+), 34 deletions(-)
[PATCH] futex: Use a folio instead of a page
Posted by Matthew Wilcox (Oracle) 2 years, 3 months ago
The futex code already handles compound pages correctly, but using a
folio lets us tell the compiler that we already have the head page and
it doesn't need to call compound_head() again.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 kernel/futex/core.c | 67 ++++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index f10587d1d481..d1d7b3c175a4 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -222,7 +222,8 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
-	struct page *page, *tail;
+	struct page *page;
+	struct folio *folio;
 	struct address_space *mapping;
 	int err, ro = 0;
 
@@ -273,54 +274,52 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
 		err = 0;
 
 	/*
-	 * The treatment of mapping from this point on is critical. The page
-	 * lock protects many things but in this context the page lock
+	 * The treatment of mapping from this point on is critical. The folio
+	 * lock protects many things but in this context the folio lock
 	 * stabilizes mapping, prevents inode freeing in the shared
 	 * file-backed region case and guards against movement to swap cache.
 	 *
-	 * Strictly speaking the page lock is not needed in all cases being
-	 * considered here and page lock forces unnecessarily serialization
+	 * Strictly speaking the folio lock is not needed in all cases being
+	 * considered here and folio lock forces unnecessarily serialization.
 	 * From this point on, mapping will be re-verified if necessary and
-	 * page lock will be acquired only if it is unavoidable
+	 * folio lock will be acquired only if it is unavoidable
 	 *
-	 * Mapping checks require the head page for any compound page so the
-	 * head page and mapping is looked up now. For anonymous pages, it
-	 * does not matter if the page splits in the future as the key is
-	 * based on the address. For filesystem-backed pages, the tail is
-	 * required as the index of the page determines the key. For
-	 * base pages, there is no tail page and tail == page.
+	 * Mapping checks require the folio so it is looked up now. For
+	 * anonymous pages, it does not matter if the folio is split
+	 * in the future as the key is based on the address. For
+	 * filesystem-backed pages, the precise page is required as the
+	 * index of the page determines the key.
 	 */
-	tail = page;
-	page = compound_head(page);
-	mapping = READ_ONCE(page->mapping);
+	folio = page_folio(page);
+	mapping = READ_ONCE(folio->mapping);
 
 	/*
-	 * If page->mapping is NULL, then it cannot be a PageAnon
+	 * If folio->mapping is NULL, then it cannot be an anonymous
 	 * page; but it might be the ZERO_PAGE or in the gate area or
 	 * in a special mapping (all cases which we are happy to fail);
 	 * or it may have been a good file page when get_user_pages_fast
 	 * found it, but truncated or holepunched or subjected to
-	 * invalidate_complete_page2 before we got the page lock (also
+	 * invalidate_complete_page2 before we got the folio lock (also
 	 * cases which we are happy to fail).  And we hold a reference,
 	 * so refcount care in invalidate_inode_page's remove_mapping
 	 * prevents drop_caches from setting mapping to NULL beneath us.
 	 *
 	 * The case we do have to guard against is when memory pressure made
 	 * shmem_writepage move it from filecache to swapcache beneath us:
-	 * an unlikely race, but we do need to retry for page->mapping.
+	 * an unlikely race, but we do need to retry for folio->mapping.
 	 */
 	if (unlikely(!mapping)) {
 		int shmem_swizzled;
 
 		/*
-		 * Page lock is required to identify which special case above
-		 * applies. If this is really a shmem page then the page lock
+		 * Folio lock is required to identify which special case above
+		 * applies. If this is really a shmem page then the folio lock
 		 * will prevent unexpected transitions.
 		 */
-		lock_page(page);
-		shmem_swizzled = PageSwapCache(page) || page->mapping;
-		unlock_page(page);
-		put_page(page);
+		folio_lock(folio);
+		shmem_swizzled = folio_test_swapcache(folio) || folio->mapping;
+		folio_unlock(folio);
+		folio_put(folio);
 
 		if (shmem_swizzled)
 			goto again;
@@ -331,14 +330,14 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
 	/*
 	 * Private mappings are handled in a simple way.
 	 *
-	 * If the futex key is stored on an anonymous page, then the associated
+	 * If the futex key is stored in anonymous memory, then the associated
 	 * object is the mm which is implicitly pinned by the calling process.
 	 *
 	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
 	 * it's a read-only handle, it's expected that futexes attach to
 	 * the object not the particular process.
 	 */
-	if (PageAnon(page)) {
+	if (folio_test_anon(folio)) {
 		/*
 		 * A RO anonymous page will never change and thus doesn't make
 		 * sense for futex operations.
@@ -357,10 +356,10 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
 
 		/*
 		 * The associated futex object in this case is the inode and
-		 * the page->mapping must be traversed. Ordinarily this should
-		 * be stabilised under page lock but it's not strictly
+		 * the folio->mapping must be traversed. Ordinarily this should
+		 * be stabilised under folio lock but it's not strictly
 		 * necessary in this case as we just want to pin the inode, not
-		 * update the radix tree or anything like that.
+		 * update i_pages or anything like that.
 		 *
 		 * The RCU read lock is taken as the inode is finally freed
 		 * under RCU. If the mapping still matches expectations then the
@@ -368,9 +367,9 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
 		 */
 		rcu_read_lock();
 
-		if (READ_ONCE(page->mapping) != mapping) {
+		if (READ_ONCE(folio->mapping) != mapping) {
 			rcu_read_unlock();
-			put_page(page);
+			folio_put(folio);
 
 			goto again;
 		}
@@ -378,19 +377,19 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
 		inode = READ_ONCE(mapping->host);
 		if (!inode) {
 			rcu_read_unlock();
-			put_page(page);
+			folio_put(folio);
 
 			goto again;
 		}
 
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
 		key->shared.i_seq = get_inode_sequence_number(inode);
-		key->shared.pgoff = page_to_pgoff(tail);
+		key->shared.pgoff = folio->index + folio_page_idx(folio, page);
 		rcu_read_unlock();
 	}
 
 out:
-	put_page(page);
+	folio_put(folio);
 	return err;
 }
 
-- 
2.40.1
Re: [PATCH] futex: Use a folio instead of a page
Posted by Peter Zijlstra 2 years, 3 months ago
On Mon, Aug 21, 2023 at 03:22:07PM +0100, Matthew Wilcox (Oracle) wrote:
> The futex code already handles compound pages correctly, but using a
> folio lets us tell the compiler that we already have the head page and
> it doesn't need to call compound_head() again.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Acked-by; Peter Zijlstra (Intel) <peterz@infradead.org>

> ---
>  kernel/futex/core.c | 67 ++++++++++++++++++++++-----------------------
>  1 file changed, 33 insertions(+), 34 deletions(-)
> 
> diff --git a/kernel/futex/core.c b/kernel/futex/core.c
> index f10587d1d481..d1d7b3c175a4 100644
> --- a/kernel/futex/core.c
> +++ b/kernel/futex/core.c
> @@ -222,7 +222,8 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
>  {
>  	unsigned long address = (unsigned long)uaddr;
>  	struct mm_struct *mm = current->mm;
> -	struct page *page, *tail;
> +	struct page *page;
> +	struct folio *folio;
>  	struct address_space *mapping;
>  	int err, ro = 0;
>  
> @@ -273,54 +274,52 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
>  		err = 0;
>  
>  	/*
> -	 * The treatment of mapping from this point on is critical. The page
> -	 * lock protects many things but in this context the page lock
> +	 * The treatment of mapping from this point on is critical. The folio
> +	 * lock protects many things but in this context the folio lock
>  	 * stabilizes mapping, prevents inode freeing in the shared
>  	 * file-backed region case and guards against movement to swap cache.
>  	 *
> -	 * Strictly speaking the page lock is not needed in all cases being
> -	 * considered here and page lock forces unnecessarily serialization
> +	 * Strictly speaking the folio lock is not needed in all cases being
> +	 * considered here and folio lock forces unnecessarily serialization.
>  	 * From this point on, mapping will be re-verified if necessary and
> -	 * page lock will be acquired only if it is unavoidable
> +	 * folio lock will be acquired only if it is unavoidable
>  	 *
> -	 * Mapping checks require the head page for any compound page so the
> -	 * head page and mapping is looked up now. For anonymous pages, it
> -	 * does not matter if the page splits in the future as the key is
> -	 * based on the address. For filesystem-backed pages, the tail is
> -	 * required as the index of the page determines the key. For
> -	 * base pages, there is no tail page and tail == page.
> +	 * Mapping checks require the folio so it is looked up now. For
> +	 * anonymous pages, it does not matter if the folio is split
> +	 * in the future as the key is based on the address. For
> +	 * filesystem-backed pages, the precise page is required as the
> +	 * index of the page determines the key.
>  	 */
> -	tail = page;
> -	page = compound_head(page);
> -	mapping = READ_ONCE(page->mapping);
> +	folio = page_folio(page);
> +	mapping = READ_ONCE(folio->mapping);
>  
>  	/*
> -	 * If page->mapping is NULL, then it cannot be a PageAnon
> +	 * If folio->mapping is NULL, then it cannot be an anonymous
>  	 * page; but it might be the ZERO_PAGE or in the gate area or
>  	 * in a special mapping (all cases which we are happy to fail);
>  	 * or it may have been a good file page when get_user_pages_fast
>  	 * found it, but truncated or holepunched or subjected to
> -	 * invalidate_complete_page2 before we got the page lock (also
> +	 * invalidate_complete_page2 before we got the folio lock (also
>  	 * cases which we are happy to fail).  And we hold a reference,
>  	 * so refcount care in invalidate_inode_page's remove_mapping
>  	 * prevents drop_caches from setting mapping to NULL beneath us.
>  	 *
>  	 * The case we do have to guard against is when memory pressure made
>  	 * shmem_writepage move it from filecache to swapcache beneath us:
> -	 * an unlikely race, but we do need to retry for page->mapping.
> +	 * an unlikely race, but we do need to retry for folio->mapping.
>  	 */
>  	if (unlikely(!mapping)) {
>  		int shmem_swizzled;
>  
>  		/*
> -		 * Page lock is required to identify which special case above
> -		 * applies. If this is really a shmem page then the page lock
> +		 * Folio lock is required to identify which special case above
> +		 * applies. If this is really a shmem page then the folio lock
>  		 * will prevent unexpected transitions.
>  		 */
> -		lock_page(page);
> -		shmem_swizzled = PageSwapCache(page) || page->mapping;
> -		unlock_page(page);
> -		put_page(page);
> +		folio_lock(folio);
> +		shmem_swizzled = folio_test_swapcache(folio) || folio->mapping;
> +		folio_unlock(folio);
> +		folio_put(folio);
>  
>  		if (shmem_swizzled)
>  			goto again;
> @@ -331,14 +330,14 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
>  	/*
>  	 * Private mappings are handled in a simple way.
>  	 *
> -	 * If the futex key is stored on an anonymous page, then the associated
> +	 * If the futex key is stored in anonymous memory, then the associated
>  	 * object is the mm which is implicitly pinned by the calling process.
>  	 *
>  	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
>  	 * it's a read-only handle, it's expected that futexes attach to
>  	 * the object not the particular process.
>  	 */
> -	if (PageAnon(page)) {
> +	if (folio_test_anon(folio)) {
>  		/*
>  		 * A RO anonymous page will never change and thus doesn't make
>  		 * sense for futex operations.
> @@ -357,10 +356,10 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
>  
>  		/*
>  		 * The associated futex object in this case is the inode and
> -		 * the page->mapping must be traversed. Ordinarily this should
> -		 * be stabilised under page lock but it's not strictly
> +		 * the folio->mapping must be traversed. Ordinarily this should
> +		 * be stabilised under folio lock but it's not strictly
>  		 * necessary in this case as we just want to pin the inode, not
> -		 * update the radix tree or anything like that.
> +		 * update i_pages or anything like that.
>  		 *
>  		 * The RCU read lock is taken as the inode is finally freed
>  		 * under RCU. If the mapping still matches expectations then the
> @@ -368,9 +367,9 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
>  		 */
>  		rcu_read_lock();
>  
> -		if (READ_ONCE(page->mapping) != mapping) {
> +		if (READ_ONCE(folio->mapping) != mapping) {
>  			rcu_read_unlock();
> -			put_page(page);
> +			folio_put(folio);
>  
>  			goto again;
>  		}
> @@ -378,19 +377,19 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
>  		inode = READ_ONCE(mapping->host);
>  		if (!inode) {
>  			rcu_read_unlock();
> -			put_page(page);
> +			folio_put(folio);
>  
>  			goto again;
>  		}
>  
>  		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
>  		key->shared.i_seq = get_inode_sequence_number(inode);
> -		key->shared.pgoff = page_to_pgoff(tail);
> +		key->shared.pgoff = folio->index + folio_page_idx(folio, page);
>  		rcu_read_unlock();
>  	}
>  
>  out:
> -	put_page(page);
> +	folio_put(folio);
>  	return err;
>  }
>  
> -- 
> 2.40.1
>
[tip: locking/core] futex: Use a folio instead of a page
Posted by tip-bot2 for Matthew Wilcox (Oracle) 2 years, 3 months ago
The following commit has been merged into the locking/core branch of tip:

Commit-ID:     e35a6cf1cc343d720ad235f678f1cd2a9876b777
Gitweb:        https://git.kernel.org/tip/e35a6cf1cc343d720ad235f678f1cd2a9876b777
Author:        Matthew Wilcox (Oracle) <willy@infradead.org>
AuthorDate:    Mon, 21 Aug 2023 15:22:07 +01:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 14 Sep 2023 00:03:09 +02:00

futex: Use a folio instead of a page

The futex code already handles compound pages correctly, but using a folio
tells the compiler that there is already a reference to the head page and
it doesn't need to call compound_head() again.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230821142207.2537124-1-willy@infradead.org

---
 kernel/futex/core.c | 67 +++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index 514e458..adf7e2c 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -222,7 +222,8 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
-	struct page *page, *tail;
+	struct page *page;
+	struct folio *folio;
 	struct address_space *mapping;
 	int err, ro = 0;
 
@@ -273,54 +274,52 @@ again:
 		err = 0;
 
 	/*
-	 * The treatment of mapping from this point on is critical. The page
-	 * lock protects many things but in this context the page lock
+	 * The treatment of mapping from this point on is critical. The folio
+	 * lock protects many things but in this context the folio lock
 	 * stabilizes mapping, prevents inode freeing in the shared
 	 * file-backed region case and guards against movement to swap cache.
 	 *
-	 * Strictly speaking the page lock is not needed in all cases being
-	 * considered here and page lock forces unnecessarily serialization
+	 * Strictly speaking the folio lock is not needed in all cases being
+	 * considered here and folio lock forces unnecessarily serialization.
 	 * From this point on, mapping will be re-verified if necessary and
-	 * page lock will be acquired only if it is unavoidable
+	 * folio lock will be acquired only if it is unavoidable
 	 *
-	 * Mapping checks require the head page for any compound page so the
-	 * head page and mapping is looked up now. For anonymous pages, it
-	 * does not matter if the page splits in the future as the key is
-	 * based on the address. For filesystem-backed pages, the tail is
-	 * required as the index of the page determines the key. For
-	 * base pages, there is no tail page and tail == page.
+	 * Mapping checks require the folio so it is looked up now. For
+	 * anonymous pages, it does not matter if the folio is split
+	 * in the future as the key is based on the address. For
+	 * filesystem-backed pages, the precise page is required as the
+	 * index of the page determines the key.
 	 */
-	tail = page;
-	page = compound_head(page);
-	mapping = READ_ONCE(page->mapping);
+	folio = page_folio(page);
+	mapping = READ_ONCE(folio->mapping);
 
 	/*
-	 * If page->mapping is NULL, then it cannot be a PageAnon
+	 * If folio->mapping is NULL, then it cannot be an anonymous
 	 * page; but it might be the ZERO_PAGE or in the gate area or
 	 * in a special mapping (all cases which we are happy to fail);
 	 * or it may have been a good file page when get_user_pages_fast
 	 * found it, but truncated or holepunched or subjected to
-	 * invalidate_complete_page2 before we got the page lock (also
+	 * invalidate_complete_page2 before we got the folio lock (also
 	 * cases which we are happy to fail).  And we hold a reference,
 	 * so refcount care in invalidate_inode_page's remove_mapping
 	 * prevents drop_caches from setting mapping to NULL beneath us.
 	 *
 	 * The case we do have to guard against is when memory pressure made
 	 * shmem_writepage move it from filecache to swapcache beneath us:
-	 * an unlikely race, but we do need to retry for page->mapping.
+	 * an unlikely race, but we do need to retry for folio->mapping.
 	 */
 	if (unlikely(!mapping)) {
 		int shmem_swizzled;
 
 		/*
-		 * Page lock is required to identify which special case above
-		 * applies. If this is really a shmem page then the page lock
+		 * Folio lock is required to identify which special case above
+		 * applies. If this is really a shmem page then the folio lock
 		 * will prevent unexpected transitions.
 		 */
-		lock_page(page);
-		shmem_swizzled = PageSwapCache(page) || page->mapping;
-		unlock_page(page);
-		put_page(page);
+		folio_lock(folio);
+		shmem_swizzled = folio_test_swapcache(folio) || folio->mapping;
+		folio_unlock(folio);
+		folio_put(folio);
 
 		if (shmem_swizzled)
 			goto again;
@@ -331,14 +330,14 @@ again:
 	/*
 	 * Private mappings are handled in a simple way.
 	 *
-	 * If the futex key is stored on an anonymous page, then the associated
+	 * If the futex key is stored in anonymous memory, then the associated
 	 * object is the mm which is implicitly pinned by the calling process.
 	 *
 	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
 	 * it's a read-only handle, it's expected that futexes attach to
 	 * the object not the particular process.
 	 */
-	if (PageAnon(page)) {
+	if (folio_test_anon(folio)) {
 		/*
 		 * A RO anonymous page will never change and thus doesn't make
 		 * sense for futex operations.
@@ -357,10 +356,10 @@ again:
 
 		/*
 		 * The associated futex object in this case is the inode and
-		 * the page->mapping must be traversed. Ordinarily this should
-		 * be stabilised under page lock but it's not strictly
+		 * the folio->mapping must be traversed. Ordinarily this should
+		 * be stabilised under folio lock but it's not strictly
 		 * necessary in this case as we just want to pin the inode, not
-		 * update the radix tree or anything like that.
+		 * update i_pages or anything like that.
 		 *
 		 * The RCU read lock is taken as the inode is finally freed
 		 * under RCU. If the mapping still matches expectations then the
@@ -368,9 +367,9 @@ again:
 		 */
 		rcu_read_lock();
 
-		if (READ_ONCE(page->mapping) != mapping) {
+		if (READ_ONCE(folio->mapping) != mapping) {
 			rcu_read_unlock();
-			put_page(page);
+			folio_put(folio);
 
 			goto again;
 		}
@@ -378,19 +377,19 @@ again:
 		inode = READ_ONCE(mapping->host);
 		if (!inode) {
 			rcu_read_unlock();
-			put_page(page);
+			folio_put(folio);
 
 			goto again;
 		}
 
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
 		key->shared.i_seq = get_inode_sequence_number(inode);
-		key->shared.pgoff = page_to_pgoff(tail);
+		key->shared.pgoff = folio->index + folio_page_idx(folio, page);
 		rcu_read_unlock();
 	}
 
 out:
-	put_page(page);
+	folio_put(folio);
 	return err;
 }