[PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage

Byungchul Park posted 47 patches 2 months, 2 weeks ago
There is a newer version of this series
[PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage
Posted by Byungchul Park 2 months, 2 weeks ago
False positive reports have been observed since dept works with the
assumption that all the pages have the same dept class, but the class
should be split since the problematic call paths are different depending
on what the page is used for.

At least, ones in block device's address_space and ones in regular
file's address_space have exclusively different usages.

Thus, define usage candidates like:

   DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */
   DEPT_PAGE_BDEV_CACHE    /* page in block device's address_space */
   DEPT_PAGE_DEFAULT       /* the others */

Introduce APIs to set each page's usage properly and make sure not to
interact between at least between DEPT_PAGE_REGFILE_CACHE and
DEPT_PAGE_BDEV_CACHE.  However, besides the exclusive usages, allow any
other combinations to interact to the other for example:

   PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for
   DEPT_PAGE_REGFILE_CACHE page and vice versa.

   PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for
   DEPT_PAGE_BDEV_CACHE page and vice versa.

   PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for
   DEPT_PAGE_DEFAULT page.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/dept.h       | 31 +++++++++++++++-
 include/linux/mm_types.h   |  1 +
 include/linux/page-flags.h | 76 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/include/linux/dept.h b/include/linux/dept.h
index 0ac13129f308..fbbc41048fac 100644
--- a/include/linux/dept.h
+++ b/include/linux/dept.h
@@ -21,8 +21,8 @@ struct task_struct;
 #define DEPT_MAX_WAIT_HIST		64
 #define DEPT_MAX_ECXT_HELD		48
 
-#define DEPT_MAX_SUBCLASSES		16
-#define DEPT_MAX_SUBCLASSES_EVT		2
+#define DEPT_MAX_SUBCLASSES		24
+#define DEPT_MAX_SUBCLASSES_EVT		3
 #define DEPT_MAX_SUBCLASSES_USR		(DEPT_MAX_SUBCLASSES / DEPT_MAX_SUBCLASSES_EVT)
 #define DEPT_MAX_SUBCLASSES_CACHE	2
 
@@ -390,6 +390,32 @@ struct dept_ext_wgen {
 	unsigned int wgen;
 };
 
+enum {
+	DEPT_PAGE_DEFAULT = 0,
+	DEPT_PAGE_REGFILE_CACHE,	/* regular file page cache */
+	DEPT_PAGE_BDEV_CACHE,		/* block device cache */
+	DEPT_PAGE_USAGE_NR,		/* nr of usages options */
+};
+
+#define DEPT_PAGE_USAGE_SHIFT 16
+#define DEPT_PAGE_USAGE_MASK ((1U << DEPT_PAGE_USAGE_SHIFT) - 1)
+#define DEPT_PAGE_USAGE_PENDING_MASK (DEPT_PAGE_USAGE_MASK << DEPT_PAGE_USAGE_SHIFT)
+
+/*
+ * Identify each page's usage type
+ */
+struct dept_page_usage {
+	/*
+	 * low 16 bits  : the current usage type
+	 * high 16 bits : usage type requested to be set
+	 *
+	 * Do not apply the type requested immediately but defer until
+	 * after clearing PG_locked bit of the folio or page e.g. by
+	 * folio_unlock().
+	 */
+	atomic_t type; /* Update and read atomically */
+};
+
 struct dept_event_site {
 	/*
 	 * event site name
@@ -562,6 +588,7 @@ extern void dept_hardirqs_off(void);
 struct dept_key { };
 struct dept_map { };
 struct dept_ext_wgen { };
+struct dept_page_usage { };
 struct dept_event_site { };
 
 #define DEPT_MAP_INITIALIZER(n, k) { }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ebc565309af..8ccbb030500c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -224,6 +224,7 @@ struct page {
 	struct page *kmsan_shadow;
 	struct page *kmsan_origin;
 #endif
+	struct dept_page_usage usage;
 	struct dept_ext_wgen pg_locked_wgen;
 } _struct_page_alignment;
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d3c4954c4218..3fd3660ddc6f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -204,6 +204,68 @@ enum pageflags {
 
 extern struct dept_map pg_locked_map;
 
+static inline int dept_set_page_usage(struct page *p,
+		unsigned int new_type)
+{
+	unsigned int type = atomic_read(&p->usage.type);
+
+	if (WARN_ON_ONCE(new_type >= DEPT_PAGE_USAGE_NR))
+		return -1;
+
+	new_type <<= DEPT_PAGE_USAGE_SHIFT;
+retry:
+	new_type &= ~DEPT_PAGE_USAGE_MASK;
+	new_type |= type & DEPT_PAGE_USAGE_MASK;
+
+	if (!atomic_try_cmpxchg(&p->usage.type, &type, new_type))
+		goto retry;
+
+	return 0;
+}
+
+static inline int dept_reset_page_usage(struct page *p)
+{
+	return dept_set_page_usage(p, DEPT_PAGE_DEFAULT);
+}
+
+static inline void dept_update_page_usage(struct page *p)
+{
+	unsigned int type = atomic_read(&p->usage.type);
+	unsigned int new_type;
+
+retry:
+	new_type = type & DEPT_PAGE_USAGE_PENDING_MASK;
+	new_type >>= DEPT_PAGE_USAGE_SHIFT;
+	new_type |= type & DEPT_PAGE_USAGE_PENDING_MASK;
+
+	/*
+	 * Already updated by others.
+	 */
+	if (type == new_type)
+		return;
+
+	if (!atomic_try_cmpxchg(&p->usage.type, &type, new_type))
+		goto retry;
+}
+
+static inline unsigned long dept_event_flags(struct page *p, bool wait)
+{
+	unsigned int type;
+
+	type = atomic_read(&p->usage.type) & DEPT_PAGE_USAGE_MASK;
+
+	if (WARN_ON_ONCE(type >= DEPT_PAGE_USAGE_NR))
+		return 0;
+
+	/*
+	 * event
+	 */
+	if (!wait)
+		return 1UL << type;
+
+	return (1UL << DEPT_PAGE_DEFAULT) | (1UL << type);
+}
+
 /*
  * Place the following annotations in its suitable point in code:
  *
@@ -214,20 +276,28 @@ extern struct dept_map pg_locked_map;
 
 static inline void dept_page_set_bit(struct page *p, int bit_nr)
 {
+	dept_update_page_usage(p);
 	if (bit_nr == PG_locked)
 		dept_request_event(&pg_locked_map, &p->pg_locked_wgen);
 }
 
 static inline void dept_page_clear_bit(struct page *p, int bit_nr)
 {
+	unsigned long evt_f;
+
+	evt_f = dept_event_flags(p, false);
 	if (bit_nr == PG_locked)
-		dept_event(&pg_locked_map, 1UL, _RET_IP_, __func__, &p->pg_locked_wgen);
+		dept_event(&pg_locked_map, evt_f, _RET_IP_, __func__, &p->pg_locked_wgen);
 }
 
 static inline void dept_page_wait_on_bit(struct page *p, int bit_nr)
 {
+	unsigned long evt_f;
+
+	dept_update_page_usage(p);
+	evt_f = dept_event_flags(p, true);
 	if (bit_nr == PG_locked)
-		dept_wait(&pg_locked_map, 1UL, _RET_IP_, __func__, 0, -1L);
+		dept_wait(&pg_locked_map, evt_f, _RET_IP_, __func__, 0, -1L);
 }
 
 static inline void dept_folio_set_bit(struct folio *f, int bit_nr)
@@ -245,6 +315,8 @@ static inline void dept_folio_wait_on_bit(struct folio *f, int bit_nr)
 	dept_page_wait_on_bit(&f->page, bit_nr);
 }
 #else
+#define dept_set_page_usage(p, t)		do { } while (0)
+#define dept_reset_page_usage(p)		do { } while (0)
 #define dept_page_set_bit(p, bit_nr)		do { } while (0)
 #define dept_page_clear_bit(p, bit_nr)		do { } while (0)
 #define dept_page_wait_on_bit(p, bit_nr)	do { } while (0)
-- 
2.17.1
Re: [PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage
Posted by Byungchul Park 4 weeks ago
On Thu, Oct 02, 2025 at 05:12:44PM +0900, Byungchul Park wrote:
> False positive reports have been observed since dept works with the
> assumption that all the pages have the same dept class, but the class
> should be split since the problematic call paths are different depending
> on what the page is used for.
> 
> At least, ones in block device's address_space and ones in regular
> file's address_space have exclusively different usages.
> 
> Thus, define usage candidates like:
> 
>    DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */
>    DEPT_PAGE_BDEV_CACHE    /* page in block device's address_space */
>    DEPT_PAGE_DEFAULT       /* the others */

1. I'd like to annotate a page to DEPT_PAGE_REGFILE_CACHE when the page
   starts to be associated with a page cache for fs data.

2. And I'd like to annotate a page to DEPT_PAGE_BDEV_CACHE when the page
   starts to be associated with meta data of fs e.g. super block.

3. Lastly, I'd like to reset the annotated value if any, that has been
   set in the page, when the page ends the assoication with either page
   cache or meta block of fs e.g. freeing the page.

Can anyone suggest good places in code for the annotation 1, 2, 3?  It'd
be totally appreciated. :-)

	Byungchul

> Introduce APIs to set each page's usage properly and make sure not to
> interact between at least between DEPT_PAGE_REGFILE_CACHE and
> DEPT_PAGE_BDEV_CACHE.  However, besides the exclusive usages, allow any
> other combinations to interact to the other for example:
> 
>    PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for
>    DEPT_PAGE_REGFILE_CACHE page and vice versa.
> 
>    PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for
>    DEPT_PAGE_BDEV_CACHE page and vice versa.
> 
>    PG_locked for DEPT_PAGE_DEFAULT page can wait for PG_locked for
>    DEPT_PAGE_DEFAULT page.
> 
> Signed-off-by: Byungchul Park <byungchul@sk.com>
> ---
>  include/linux/dept.h       | 31 +++++++++++++++-
>  include/linux/mm_types.h   |  1 +
>  include/linux/page-flags.h | 76 +++++++++++++++++++++++++++++++++++++-
>  3 files changed, 104 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/dept.h b/include/linux/dept.h
> index 0ac13129f308..fbbc41048fac 100644
> --- a/include/linux/dept.h
> +++ b/include/linux/dept.h
> @@ -21,8 +21,8 @@ struct task_struct;
>  #define DEPT_MAX_WAIT_HIST		64
>  #define DEPT_MAX_ECXT_HELD		48
>  
> -#define DEPT_MAX_SUBCLASSES		16
> -#define DEPT_MAX_SUBCLASSES_EVT		2
> +#define DEPT_MAX_SUBCLASSES		24
> +#define DEPT_MAX_SUBCLASSES_EVT		3
>  #define DEPT_MAX_SUBCLASSES_USR		(DEPT_MAX_SUBCLASSES / DEPT_MAX_SUBCLASSES_EVT)
>  #define DEPT_MAX_SUBCLASSES_CACHE	2
>  
> @@ -390,6 +390,32 @@ struct dept_ext_wgen {
>  	unsigned int wgen;
>  };
>  
> +enum {
> +	DEPT_PAGE_DEFAULT = 0,
> +	DEPT_PAGE_REGFILE_CACHE,	/* regular file page cache */
> +	DEPT_PAGE_BDEV_CACHE,		/* block device cache */
> +	DEPT_PAGE_USAGE_NR,		/* nr of usages options */
> +};
> +
> +#define DEPT_PAGE_USAGE_SHIFT 16
> +#define DEPT_PAGE_USAGE_MASK ((1U << DEPT_PAGE_USAGE_SHIFT) - 1)
> +#define DEPT_PAGE_USAGE_PENDING_MASK (DEPT_PAGE_USAGE_MASK << DEPT_PAGE_USAGE_SHIFT)
> +
> +/*
> + * Identify each page's usage type
> + */
> +struct dept_page_usage {
> +	/*
> +	 * low 16 bits  : the current usage type
> +	 * high 16 bits : usage type requested to be set
> +	 *
> +	 * Do not apply the type requested immediately but defer until
> +	 * after clearing PG_locked bit of the folio or page e.g. by
> +	 * folio_unlock().
> +	 */
> +	atomic_t type; /* Update and read atomically */
> +};
> +
>  struct dept_event_site {
>  	/*
>  	 * event site name
> @@ -562,6 +588,7 @@ extern void dept_hardirqs_off(void);
>  struct dept_key { };
>  struct dept_map { };
>  struct dept_ext_wgen { };
> +struct dept_page_usage { };
>  struct dept_event_site { };
>  
>  #define DEPT_MAP_INITIALIZER(n, k) { }
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 5ebc565309af..8ccbb030500c 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -224,6 +224,7 @@ struct page {
>  	struct page *kmsan_shadow;
>  	struct page *kmsan_origin;
>  #endif
> +	struct dept_page_usage usage;
>  	struct dept_ext_wgen pg_locked_wgen;
>  } _struct_page_alignment;
>  
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index d3c4954c4218..3fd3660ddc6f 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -204,6 +204,68 @@ enum pageflags {
>  
>  extern struct dept_map pg_locked_map;
>  
> +static inline int dept_set_page_usage(struct page *p,
> +		unsigned int new_type)
> +{
> +	unsigned int type = atomic_read(&p->usage.type);
> +
> +	if (WARN_ON_ONCE(new_type >= DEPT_PAGE_USAGE_NR))
> +		return -1;
> +
> +	new_type <<= DEPT_PAGE_USAGE_SHIFT;
> +retry:
> +	new_type &= ~DEPT_PAGE_USAGE_MASK;
> +	new_type |= type & DEPT_PAGE_USAGE_MASK;
> +
> +	if (!atomic_try_cmpxchg(&p->usage.type, &type, new_type))
> +		goto retry;
> +
> +	return 0;
> +}
> +
> +static inline int dept_reset_page_usage(struct page *p)
> +{
> +	return dept_set_page_usage(p, DEPT_PAGE_DEFAULT);
> +}
> +
> +static inline void dept_update_page_usage(struct page *p)
> +{
> +	unsigned int type = atomic_read(&p->usage.type);
> +	unsigned int new_type;
> +
> +retry:
> +	new_type = type & DEPT_PAGE_USAGE_PENDING_MASK;
> +	new_type >>= DEPT_PAGE_USAGE_SHIFT;
> +	new_type |= type & DEPT_PAGE_USAGE_PENDING_MASK;
> +
> +	/*
> +	 * Already updated by others.
> +	 */
> +	if (type == new_type)
> +		return;
> +
> +	if (!atomic_try_cmpxchg(&p->usage.type, &type, new_type))
> +		goto retry;
> +}
> +
> +static inline unsigned long dept_event_flags(struct page *p, bool wait)
> +{
> +	unsigned int type;
> +
> +	type = atomic_read(&p->usage.type) & DEPT_PAGE_USAGE_MASK;
> +
> +	if (WARN_ON_ONCE(type >= DEPT_PAGE_USAGE_NR))
> +		return 0;
> +
> +	/*
> +	 * event
> +	 */
> +	if (!wait)
> +		return 1UL << type;
> +
> +	return (1UL << DEPT_PAGE_DEFAULT) | (1UL << type);
> +}
> +
>  /*
>   * Place the following annotations in its suitable point in code:
>   *
> @@ -214,20 +276,28 @@ extern struct dept_map pg_locked_map;
>  
>  static inline void dept_page_set_bit(struct page *p, int bit_nr)
>  {
> +	dept_update_page_usage(p);
>  	if (bit_nr == PG_locked)
>  		dept_request_event(&pg_locked_map, &p->pg_locked_wgen);
>  }
>  
>  static inline void dept_page_clear_bit(struct page *p, int bit_nr)
>  {
> +	unsigned long evt_f;
> +
> +	evt_f = dept_event_flags(p, false);
>  	if (bit_nr == PG_locked)
> -		dept_event(&pg_locked_map, 1UL, _RET_IP_, __func__, &p->pg_locked_wgen);
> +		dept_event(&pg_locked_map, evt_f, _RET_IP_, __func__, &p->pg_locked_wgen);
>  }
>  
>  static inline void dept_page_wait_on_bit(struct page *p, int bit_nr)
>  {
> +	unsigned long evt_f;
> +
> +	dept_update_page_usage(p);
> +	evt_f = dept_event_flags(p, true);
>  	if (bit_nr == PG_locked)
> -		dept_wait(&pg_locked_map, 1UL, _RET_IP_, __func__, 0, -1L);
> +		dept_wait(&pg_locked_map, evt_f, _RET_IP_, __func__, 0, -1L);
>  }
>  
>  static inline void dept_folio_set_bit(struct folio *f, int bit_nr)
> @@ -245,6 +315,8 @@ static inline void dept_folio_wait_on_bit(struct folio *f, int bit_nr)
>  	dept_page_wait_on_bit(&f->page, bit_nr);
>  }
>  #else
> +#define dept_set_page_usage(p, t)		do { } while (0)
> +#define dept_reset_page_usage(p)		do { } while (0)
>  #define dept_page_set_bit(p, bit_nr)		do { } while (0)
>  #define dept_page_clear_bit(p, bit_nr)		do { } while (0)
>  #define dept_page_wait_on_bit(p, bit_nr)	do { } while (0)
> -- 
> 2.17.1
Re: [PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage
Posted by Matthew Wilcox 4 weeks ago
On Wed, Nov 19, 2025 at 07:53:12PM +0900, Byungchul Park wrote:
> On Thu, Oct 02, 2025 at 05:12:44PM +0900, Byungchul Park wrote:
> > False positive reports have been observed since dept works with the
> > assumption that all the pages have the same dept class, but the class
> > should be split since the problematic call paths are different depending
> > on what the page is used for.
> > 
> > At least, ones in block device's address_space and ones in regular
> > file's address_space have exclusively different usages.
> > 
> > Thus, define usage candidates like:
> > 
> >    DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */
> >    DEPT_PAGE_BDEV_CACHE    /* page in block device's address_space */
> >    DEPT_PAGE_DEFAULT       /* the others */
> 
> 1. I'd like to annotate a page to DEPT_PAGE_REGFILE_CACHE when the page
>    starts to be associated with a page cache for fs data.
> 
> 2. And I'd like to annotate a page to DEPT_PAGE_BDEV_CACHE when the page
>    starts to be associated with meta data of fs e.g. super block.
> 
> 3. Lastly, I'd like to reset the annotated value if any, that has been
>    set in the page, when the page ends the assoication with either page
>    cache or meta block of fs e.g. freeing the page.
> 
> Can anyone suggest good places in code for the annotation 1, 2, 3?  It'd
> be totally appreciated. :-)

I don't think it makes sense to track lock state in the page (nor
folio).  Partly bcause there's just so many of them, but also because
the locking rules don't really apply to individual folios so much as
they do to the mappings (or anon_vmas) that contain folios.

If you're looking to find deadlock scenarios, I think it makes more
sense to track all folio locks in a given mapping as the same lock
type rather than track each folio's lock status.

For example, let's suppose we did something like this in the
page fault path:

Look up and lock a folio (we need folios locked to insert them into
the page tables to avoid a race with truncate)
Try to allocate a page table
Go into reclaim, attempt to reclaim a folio from this mapping

We ought to detect that as a potential deadlock, regardless of which
folio in the mapping we attempt to reclaim.  So can we track folio
locking at the mapping/anon_vma level instead?

---

My current understanding of folio locking rules:

If you hold a lock on folio A, you can take a lock on folio B if:

1. A->mapping == B->mapping and A->index < B->index
   (for example writeback; we take locks on all folios to be written
    back in order)
2. !S_ISBLK(A->mapping->host) and S_ISBLK(B->mapping->host)
3. S_ISREG(A->mapping->host) and S_ISREG(B->mapping->host) with
   inode_lock() held on both and A->index < B->index
   (the remap_range code)
Re: [PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage
Posted by Byungchul Park 2 weeks, 2 days ago
On Wed, Nov 19, 2025 at 02:37:17PM +0000, Matthew Wilcox wrote:
> On Wed, Nov 19, 2025 at 07:53:12PM +0900, Byungchul Park wrote:
> > On Thu, Oct 02, 2025 at 05:12:44PM +0900, Byungchul Park wrote:
> > > False positive reports have been observed since dept works with the
> > > assumption that all the pages have the same dept class, but the class
> > > should be split since the problematic call paths are different depending
> > > on what the page is used for.
> > >
> > > At least, ones in block device's address_space and ones in regular
> > > file's address_space have exclusively different usages.
> > >
> > > Thus, define usage candidates like:
> > >
> > >    DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */
> > >    DEPT_PAGE_BDEV_CACHE    /* page in block device's address_space */
> > >    DEPT_PAGE_DEFAULT       /* the others */
> >
> > 1. I'd like to annotate a page to DEPT_PAGE_REGFILE_CACHE when the page
> >    starts to be associated with a page cache for fs data.
> >
> > 2. And I'd like to annotate a page to DEPT_PAGE_BDEV_CACHE when the page
> >    starts to be associated with meta data of fs e.g. super block.
> >
> > 3. Lastly, I'd like to reset the annotated value if any, that has been
> >    set in the page, when the page ends the assoication with either page
> >    cache or meta block of fs e.g. freeing the page.
> >
> > Can anyone suggest good places in code for the annotation 1, 2, 3?  It'd
> > be totally appreciated. :-)
> 
> I don't think it makes sense to track lock state in the page (nor
> folio).  Partly bcause there's just so many of them, but also because
> the locking rules don't really apply to individual folios so much as
> they do to the mappings (or anon_vmas) that contain folios.

I've been trying to fully understand what you meant but maybe failed.

FWIW, dept is working based on classification, not instance by instance,
that is similar to lockdep.  This patch is for resolving issues that
might come from the fact that there is a **single class** for PG_locked,
by splitting the class to several ones according to their usages.

> If you're looking to find deadlock scenarios, I think it makes more
> sense to track all folio locks in a given mapping as the same lock
> type rather than track each folio's lock status.
> 
> For example, let's suppose we did something like this in the
> page fault path:
> 
> Look up and lock a folio (we need folios locked to insert them into
> the page tables to avoid a race with truncate)
> Try to allocate a page table
> Go into reclaim, attempt to reclaim a folio from this mapping

I think you are talking about nested lock patterns involving PG_locked.

Even though dept can do much more jobs than just tracking nested lock
patterns within a single context, of course, nested lock patterns
involving PG_locked should be handled appropriately, maybe with the
useful information you gave.  When I work on handling nested locks esp.
involving PG_locked, I will try to get you again.  Thanks.

However, I have no choice but to keep this approach for the **single
class** issue.  Feel free to ask if any.

	Byungchul

> We ought to detect that as a potential deadlock, regardless of which
> folio in the mapping we attempt to reclaim.  So can we track folio
> locking at the mapping/anon_vma level instead?
> 
> ---
> 
> My current understanding of folio locking rules:
> 
> If you hold a lock on folio A, you can take a lock on folio B if:
> 
> 1. A->mapping == B->mapping and A->index < B->index
>    (for example writeback; we take locks on all folios to be written
>     back in order)
> 2. !S_ISBLK(A->mapping->host) and S_ISBLK(B->mapping->host)
> 3. S_ISREG(A->mapping->host) and S_ISREG(B->mapping->host) with
>    inode_lock() held on both and A->index < B->index
>    (the remap_range code)
Re: [PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage
Posted by Byungchul Park 3 weeks, 6 days ago
On Wed, Nov 19, 2025 at 02:37:17PM +0000, Matthew Wilcox wrote:
> On Wed, Nov 19, 2025 at 07:53:12PM +0900, Byungchul Park wrote:
> > On Thu, Oct 02, 2025 at 05:12:44PM +0900, Byungchul Park wrote:
> > > False positive reports have been observed since dept works with the
> > > assumption that all the pages have the same dept class, but the class
> > > should be split since the problematic call paths are different depending
> > > on what the page is used for.
> > >
> > > At least, ones in block device's address_space and ones in regular
> > > file's address_space have exclusively different usages.
> > >
> > > Thus, define usage candidates like:
> > >
> > >    DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */
> > >    DEPT_PAGE_BDEV_CACHE    /* page in block device's address_space */
> > >    DEPT_PAGE_DEFAULT       /* the others */
> >
> > 1. I'd like to annotate a page to DEPT_PAGE_REGFILE_CACHE when the page
> >    starts to be associated with a page cache for fs data.
> >
> > 2. And I'd like to annotate a page to DEPT_PAGE_BDEV_CACHE when the page
> >    starts to be associated with meta data of fs e.g. super block.
> >
> > 3. Lastly, I'd like to reset the annotated value if any, that has been
> >    set in the page, when the page ends the assoication with either page
> >    cache or meta block of fs e.g. freeing the page.
> >
> > Can anyone suggest good places in code for the annotation 1, 2, 3?  It'd
> > be totally appreciated. :-)
> 
> I don't think it makes sense to track lock state in the page (nor
> folio).  Partly bcause there's just so many of them, but also because
> the locking rules don't really apply to individual folios so much as
> they do to the mappings (or anon_vmas) that contain folios.

Thank you for the suggestion!

Since two folios associated to different mappings might appear in the
same callpath that usually be classified to a single class, I need to
think how to reflect the suggestion.

I guess you wanted to tell me a folio can only be associated to a single
mapping at once.  Right?  If so, sure, I should reflect it.

> If you're looking to find deadlock scenarios, I think it makes more
> sense to track all folio locks in a given mapping as the same lock
> type rather than track each folio's lock status.
> 
> For example, let's suppose we did something like this in the
> page fault path:
> 
> Look up and lock a folio (we need folios locked to insert them into
> the page tables to avoid a race with truncate)
> Try to allocate a page table
> Go into reclaim, attempt to reclaim a folio from this mapping
> 
> We ought to detect that as a potential deadlock, regardless of which
> folio in the mapping we attempt to reclaim.  So can we track folio

Did you mean 'regardless' for 'potential' detection, right?

> locking at the mapping/anon_vma level instead?

Piece of cake.  Even though it may increase the number of DEPT classes,
I hope it will be okay.  I just need to know the points in code where
folios start/end being associated to their specific mappings.

	Byungchul

> ---
> 
> My current understanding of folio locking rules:
> 
> If you hold a lock on folio A, you can take a lock on folio B if:
> 
> 1. A->mapping == B->mapping and A->index < B->index
>    (for example writeback; we take locks on all folios to be written
>     back in order)
> 2. !S_ISBLK(A->mapping->host) and S_ISBLK(B->mapping->host)
> 3. S_ISREG(A->mapping->host) and S_ISREG(B->mapping->host) with
>    inode_lock() held on both and A->index < B->index
>    (the remap_range code)
Re: [PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage
Posted by Byungchul Park 3 weeks, 6 days ago
On Thu, Nov 20, 2025 at 11:09:09AM +0900, Byungchul Park wrote:
> On Wed, Nov 19, 2025 at 02:37:17PM +0000, Matthew Wilcox wrote:
> > On Wed, Nov 19, 2025 at 07:53:12PM +0900, Byungchul Park wrote:
> > > On Thu, Oct 02, 2025 at 05:12:44PM +0900, Byungchul Park wrote:
> > > > False positive reports have been observed since dept works with the
> > > > assumption that all the pages have the same dept class, but the class
> > > > should be split since the problematic call paths are different depending
> > > > on what the page is used for.
> > > >
> > > > At least, ones in block device's address_space and ones in regular
> > > > file's address_space have exclusively different usages.
> > > >
> > > > Thus, define usage candidates like:
> > > >
> > > >    DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */
> > > >    DEPT_PAGE_BDEV_CACHE    /* page in block device's address_space */
> > > >    DEPT_PAGE_DEFAULT       /* the others */
> > >
> > > 1. I'd like to annotate a page to DEPT_PAGE_REGFILE_CACHE when the page
> > >    starts to be associated with a page cache for fs data.
> > >
> > > 2. And I'd like to annotate a page to DEPT_PAGE_BDEV_CACHE when the page
> > >    starts to be associated with meta data of fs e.g. super block.
> > >
> > > 3. Lastly, I'd like to reset the annotated value if any, that has been
> > >    set in the page, when the page ends the assoication with either page
> > >    cache or meta block of fs e.g. freeing the page.
> > >
> > > Can anyone suggest good places in code for the annotation 1, 2, 3?  It'd
> > > be totally appreciated. :-)
> > 
> > I don't think it makes sense to track lock state in the page (nor
> > folio).  Partly bcause there's just so many of them, but also because
> > the locking rules don't really apply to individual folios so much as
> > they do to the mappings (or anon_vmas) that contain folios.
> 
> Thank you for the suggestion!
> 
> Since two folios associated to different mappings might appear in the
> same callpath that usually be classified to a single class, I need to
> think how to reflect the suggestion.
> 
> I guess you wanted to tell me a folio can only be associated to a single
> mapping at once.  Right?  If so, sure, I should reflect it.
> 
> > If you're looking to find deadlock scenarios, I think it makes more
> > sense to track all folio locks in a given mapping as the same lock
> > type rather than track each folio's lock status.
> > 
> > For example, let's suppose we did something like this in the
> > page fault path:
> > 
> > Look up and lock a folio (we need folios locked to insert them into
> > the page tables to avoid a race with truncate)
> > Try to allocate a page table
> > Go into reclaim, attempt to reclaim a folio from this mapping
> > 
> > We ought to detect that as a potential deadlock, regardless of which
> > folio in the mapping we attempt to reclaim.  So can we track folio
> 
> Did you mean 'regardless' for 'potential' detection, right?
> 
> > locking at the mapping/anon_vma level instead?
> 
> Piece of cake.  Even though it may increase the number of DEPT classes,

Might be not as easy as I thought it'd be.  I need to think it more..

	Byungchul

> I hope it will be okay.  I just need to know the points in code where
> folios start/end being associated to their specific mappings.
> 
> 	Byungchul
> 
> > ---
> > 
> > My current understanding of folio locking rules:
> > 
> > If you hold a lock on folio A, you can take a lock on folio B if:
> > 
> > 1. A->mapping == B->mapping and A->index < B->index
> >    (for example writeback; we take locks on all folios to be written
> >     back in order)
> > 2. !S_ISBLK(A->mapping->host) and S_ISBLK(B->mapping->host)
> > 3. S_ISREG(A->mapping->host) and S_ISREG(B->mapping->host) with
> >    inode_lock() held on both and A->index < B->index
> >    (the remap_range code)
Re: [PATCH v17 44/47] dept: introduce APIs to set page usage and use subclasses_evt for the usage
Posted by Byungchul Park 3 weeks, 6 days ago
On Thu, Nov 20, 2025 at 11:09:09AM +0900, Byungchul Park wrote:
> On Wed, Nov 19, 2025 at 02:37:17PM +0000, Matthew Wilcox wrote:
> > On Wed, Nov 19, 2025 at 07:53:12PM +0900, Byungchul Park wrote:
> > > On Thu, Oct 02, 2025 at 05:12:44PM +0900, Byungchul Park wrote:
> > > > False positive reports have been observed since dept works with the
> > > > assumption that all the pages have the same dept class, but the class
> > > > should be split since the problematic call paths are different depending
> > > > on what the page is used for.
> > > >
> > > > At least, ones in block device's address_space and ones in regular
> > > > file's address_space have exclusively different usages.
> > > >
> > > > Thus, define usage candidates like:
> > > >
> > > >    DEPT_PAGE_REGFILE_CACHE /* page in regular file's address_space */
> > > >    DEPT_PAGE_BDEV_CACHE    /* page in block device's address_space */
> > > >    DEPT_PAGE_DEFAULT       /* the others */
> > >
> > > 1. I'd like to annotate a page to DEPT_PAGE_REGFILE_CACHE when the page
> > >    starts to be associated with a page cache for fs data.
> > >
> > > 2. And I'd like to annotate a page to DEPT_PAGE_BDEV_CACHE when the page
> > >    starts to be associated with meta data of fs e.g. super block.
> > >
> > > 3. Lastly, I'd like to reset the annotated value if any, that has been
> > >    set in the page, when the page ends the assoication with either page
> > >    cache or meta block of fs e.g. freeing the page.
> > >
> > > Can anyone suggest good places in code for the annotation 1, 2, 3?  It'd
> > > be totally appreciated. :-)
> > 
> > I don't think it makes sense to track lock state in the page (nor
> > folio).  Partly bcause there's just so many of them, but also because
> > the locking rules don't really apply to individual folios so much as
> > they do to the mappings (or anon_vmas) that contain folios.
> 
> Thank you for the suggestion!
> 
> Since two folios associated to different mappings might appear in the
> same callpath that usually be classified to a single class, I need to
> think how to reflect the suggestion.
> 
> I guess you wanted to tell me a folio can only be associated to a single
> mapping at once.  Right?  If so, sure, I should reflect it.
> 
> > If you're looking to find deadlock scenarios, I think it makes more
> > sense to track all folio locks in a given mapping as the same lock
> > type rather than track each folio's lock status.
> > 
> > For example, let's suppose we did something like this in the
> > page fault path:
> > 
> > Look up and lock a folio (we need folios locked to insert them into
> > the page tables to avoid a race with truncate)
> > Try to allocate a page table
> > Go into reclaim, attempt to reclaim a folio from this mapping
> > 
> > We ought to detect that as a potential deadlock, regardless of which
> > folio in the mapping we attempt to reclaim.  So can we track folio
> 
> Did you mean 'regardless' for 'potential' detection, right?
> 
> > locking at the mapping/anon_vma level instead?
> 
> Piece of cake.  Even though it may increase the number of DEPT classes,
> I hope it will be okay.  I just need to know the points in code where
> folios start/end being associated to their specific mappings.

Assuming that I understand what you meant correctly, I can use the
@mapping value in struct page as a second key in DEPT.  Of course, it
doesn't guarantee unique ids of the mappings for ever.  However, I think
it can be a good and quite simple start.

	Byungchul

> 	Byungchul
> 
> > ---
> > 
> > My current understanding of folio locking rules:
> > 
> > If you hold a lock on folio A, you can take a lock on folio B if:
> > 
> > 1. A->mapping == B->mapping and A->index < B->index
> >    (for example writeback; we take locks on all folios to be written
> >     back in order)
> > 2. !S_ISBLK(A->mapping->host) and S_ISBLK(B->mapping->host)
> > 3. S_ISREG(A->mapping->host) and S_ISREG(B->mapping->host) with
> >    inode_lock() held on both and A->index < B->index
> >    (the remap_range code)