mm/truncate.c | 67 +++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 37 deletions(-)
Make clear_shadow_entry() clear shadow entries in `struct folio_batch`
so that it can reduce contention on i_lock and i_pages locks, e.g.,
watchdog: BUG: soft lockup - CPU#29 stuck for 11s! [fio:2701649]
clear_shadow_entry+0x3d/0x100
mapping_try_invalidate+0x117/0x1d0
invalidate_mapping_pages+0x10/0x20
invalidate_bdev+0x3c/0x50
blkdev_common_ioctl+0x5f7/0xa90
blkdev_ioctl+0x109/0x270
Reported-by: Bharata B Rao <bharata@amd.com>
Closes: https://lore.kernel.org/d2841226-e27b-4d3d-a578-63587a3aa4f3@amd.com/
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/truncate.c | 67 +++++++++++++++++++++++----------------------------
1 file changed, 30 insertions(+), 37 deletions(-)
diff --git a/mm/truncate.c b/mm/truncate.c
index 5ce62a939e55..bc12e7ff7d6a 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -39,12 +39,24 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
xas_store(&xas, NULL);
}
-static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
- void *entry)
+static void clear_shadow_entry(struct address_space *mapping,
+ struct folio_batch *fbatch, pgoff_t *indices)
{
+ int i;
+
+ if (shmem_mapping(mapping) || dax_mapping(mapping))
+ return;
+
spin_lock(&mapping->host->i_lock);
xa_lock_irq(&mapping->i_pages);
- __clear_shadow_entry(mapping, index, entry);
+
+ for (i = 0; i < folio_batch_count(fbatch); i++) {
+ struct folio *folio = fbatch->folios[i];
+
+ if (xa_is_value(folio))
+ __clear_shadow_entry(mapping, indices[i], folio);
+ }
+
xa_unlock_irq(&mapping->i_pages);
if (mapping_shrinkable(mapping))
inode_add_lru(mapping->host);
@@ -105,36 +117,6 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping,
fbatch->nr = j;
}
-/*
- * Invalidate exceptional entry if easily possible. This handles exceptional
- * entries for invalidate_inode_pages().
- */
-static int invalidate_exceptional_entry(struct address_space *mapping,
- pgoff_t index, void *entry)
-{
- /* Handled by shmem itself, or for DAX we do nothing. */
- if (shmem_mapping(mapping) || dax_mapping(mapping))
- return 1;
- clear_shadow_entry(mapping, index, entry);
- return 1;
-}
-
-/*
- * Invalidate exceptional entry if clean. This handles exceptional entries for
- * invalidate_inode_pages2() so for DAX it evicts only clean entries.
- */
-static int invalidate_exceptional_entry2(struct address_space *mapping,
- pgoff_t index, void *entry)
-{
- /* Handled by shmem itself */
- if (shmem_mapping(mapping))
- return 1;
- if (dax_mapping(mapping))
- return dax_invalidate_mapping_entry_sync(mapping, index);
- clear_shadow_entry(mapping, index, entry);
- return 1;
-}
-
/**
* folio_invalidate - Invalidate part or all of a folio.
* @folio: The folio which is affected.
@@ -494,6 +476,7 @@ unsigned long mapping_try_invalidate(struct address_space *mapping,
unsigned long ret;
unsigned long count = 0;
int i;
+ bool xa_has_values = false;
folio_batch_init(&fbatch);
while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
@@ -503,8 +486,8 @@ unsigned long mapping_try_invalidate(struct address_space *mapping,
/* We rely upon deletion not changing folio->index */
if (xa_is_value(folio)) {
- count += invalidate_exceptional_entry(mapping,
- indices[i], folio);
+ xa_has_values = true;
+ count++;
continue;
}
@@ -522,6 +505,10 @@ unsigned long mapping_try_invalidate(struct address_space *mapping,
}
count += ret;
}
+
+ if (xa_has_values)
+ clear_shadow_entry(mapping, &fbatch, indices);
+
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
@@ -616,6 +603,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
int ret = 0;
int ret2 = 0;
int did_range_unmap = 0;
+ bool xa_has_values = false;
if (mapping_empty(mapping))
return 0;
@@ -629,8 +617,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
/* We rely upon deletion not changing folio->index */
if (xa_is_value(folio)) {
- if (!invalidate_exceptional_entry2(mapping,
- indices[i], folio))
+ xa_has_values = true;
+ if (dax_mapping(mapping) &&
+ !dax_invalidate_mapping_entry_sync(mapping, indices[i]))
ret = -EBUSY;
continue;
}
@@ -666,6 +655,10 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
ret = ret2;
folio_unlock(folio);
}
+
+ if (xa_has_values)
+ clear_shadow_entry(mapping, &fbatch, indices);
+
folio_batch_remove_exceptionals(&fbatch);
folio_batch_release(&fbatch);
cond_resched();
--
2.45.2.803.g4e1b14247a-goog
On Mon, Jul 08, 2024 at 03:27:53PM -0600, Yu Zhao wrote:
> Make clear_shadow_entry() clear shadow entries in `struct folio_batch`
> so that it can reduce contention on i_lock and i_pages locks, e.g.,
I think it needs to be renamed, perhaps to clear_shadow_entries().
> @@ -503,8 +486,8 @@ unsigned long mapping_try_invalidate(struct address_space *mapping,
> /* We rely upon deletion not changing folio->index */
>
> if (xa_is_value(folio)) {
> - count += invalidate_exceptional_entry(mapping,
> - indices[i], folio);
> + xa_has_values = true;
> + count++;
Mmm. This is awkward. It's supposed to return the number of pages,
not the number of folios (or shadow entries) invalidated.
On Mon, Jul 8, 2024 at 4:36 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Mon, Jul 08, 2024 at 03:27:53PM -0600, Yu Zhao wrote:
> > Make clear_shadow_entry() clear shadow entries in `struct folio_batch`
> > so that it can reduce contention on i_lock and i_pages locks, e.g.,
>
> I think it needs to be renamed, perhaps to clear_shadow_entries().
Yes, thank you.
> > @@ -503,8 +486,8 @@ unsigned long mapping_try_invalidate(struct address_space *mapping,
> > /* We rely upon deletion not changing folio->index */
> >
> > if (xa_is_value(folio)) {
> > - count += invalidate_exceptional_entry(mapping,
> > - indices[i], folio);
> > + xa_has_values = true;
> > + count++;
>
> Mmm. This is awkward. It's supposed to return the number of pages,
> not the number of folios (or shadow entries) invalidated.
I didn't think about this much. It seems to me no callers really care
about it other than some debugging messages?
On Mon, 8 Jul 2024 15:27:53 -0600 Yu Zhao <yuzhao@google.com> wrote:
> Make clear_shadow_entry() clear shadow entries in `struct folio_batch`
> so that it can reduce contention on i_lock and i_pages locks, e.g.,
>
> watchdog: BUG: soft lockup - CPU#29 stuck for 11s! [fio:2701649]
> clear_shadow_entry+0x3d/0x100
> mapping_try_invalidate+0x117/0x1d0
> invalidate_mapping_pages+0x10/0x20
> invalidate_bdev+0x3c/0x50
> blkdev_common_ioctl+0x5f7/0xa90
> blkdev_ioctl+0x109/0x270
This will clearly reduce lock traffic a lot, but does it truly fix the
issue? Is it the case that sufficiently extreme loads will still run
into problems?
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -39,12 +39,24 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
> xas_store(&xas, NULL);
> }
>
> -static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
> - void *entry)
> +static void clear_shadow_entry(struct address_space *mapping,
> + struct folio_batch *fbatch, pgoff_t *indices)
> {
> + int i;
> +
> + if (shmem_mapping(mapping) || dax_mapping(mapping))
> + return;
We lost the comment which was in invalidate_exceptional_entry() and
elsewhere. It wasn't a terribly good one but I do think a few words
which explain why we're testing for these things would be helpful.
I expect we should backport this. But identifying a Fixes: target
looks to be challenging.
On Mon, Jul 8, 2024 at 4:16 PM Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Mon, 8 Jul 2024 15:27:53 -0600 Yu Zhao <yuzhao@google.com> wrote:
>
> > Make clear_shadow_entry() clear shadow entries in `struct folio_batch`
> > so that it can reduce contention on i_lock and i_pages locks, e.g.,
> >
> > watchdog: BUG: soft lockup - CPU#29 stuck for 11s! [fio:2701649]
> > clear_shadow_entry+0x3d/0x100
> > mapping_try_invalidate+0x117/0x1d0
> > invalidate_mapping_pages+0x10/0x20
> > invalidate_bdev+0x3c/0x50
> > blkdev_common_ioctl+0x5f7/0xa90
> > blkdev_ioctl+0x109/0x270
>
> This will clearly reduce lock traffic a lot, but does it truly fix the
> issue? Is it the case that sufficiently extreme loads will still run
> into problems?
I think Bharata was running extreme loads. So I'd say it's good enough
for now, considering truncation doesn't happen that often.
> > --- a/mm/truncate.c
> > +++ b/mm/truncate.c
> > @@ -39,12 +39,24 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
> > xas_store(&xas, NULL);
> > }
> >
> > -static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
> > - void *entry)
> > +static void clear_shadow_entry(struct address_space *mapping,
> > + struct folio_batch *fbatch, pgoff_t *indices)
> > {
> > + int i;
> > +
> > + if (shmem_mapping(mapping) || dax_mapping(mapping))
> > + return;
>
> We lost the comment which was in invalidate_exceptional_entry() and
> elsewhere. It wasn't a terribly good one but I do think a few words
> which explain why we're testing for these things would be helpful.
I'll put the original comment back. It seems to me it was stating the
obvious, and I don't really know how to improve it since it's obvious
;)
> I expect we should backport this. But identifying a Fixes: target
> looks to be challenging.
I wouldn't worry about backporting, nobody else has run into this
scalability issue (not even a day-to-day performance problem).
© 2016 - 2025 Red Hat, Inc.