When the pagefault is not for write and the refault distance is close,
the page will be activated directly. If there are too many such pages in
a file, that means the pages may be reclaimed immediately.
In such situation, there is no positive effect to read-ahead since it will
only waste IO. So collect the number of such pages and when the number is
too large, stop bothering with read-ahead for a while until it decreased
automatically.
Define 'too large' as 10000 experientially, which can solves the problem
and does not affect by the occasional active refault.
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
---
include/linux/fs.h | 2 ++
include/linux/pagemap.h | 1 +
mm/filemap.c | 16 ++++++++++++++++
mm/readahead.c | 4 ++++
4 files changed, 23 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a704951..f2a1825442f5a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -960,6 +960,7 @@ struct fown_struct {
* the first of these pages is accessed.
* @ra_pages: Maximum size of a readahead request, copied from the bdi.
* @mmap_miss: How many mmap accesses missed in the page cache.
+ * @active_refault: Number of active page refault.
* @prev_pos: The last byte in the most recent read request.
*
* When this structure is passed to ->readahead(), the "most recent"
@@ -971,6 +972,7 @@ struct file_ra_state {
unsigned int async_size;
unsigned int ra_pages;
unsigned int mmap_miss;
+ unsigned int active_refault;
loff_t prev_pos;
};
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d2..da9eaf985dec4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1256,6 +1256,7 @@ struct readahead_control {
pgoff_t _index;
unsigned int _nr_pages;
unsigned int _batch_count;
+ unsigned int _active_refault;
bool _workingset;
unsigned long _pflags;
};
diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db7..4de80592ab270 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3037,6 +3037,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
#ifdef CONFIG_MMU
#define MMAP_LOTSAMISS (100)
+#define ACTIVE_REFAULT_LIMIT (10000)
/*
* lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
* @vmf - the vm_fault for this fault.
@@ -3142,6 +3143,18 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
if (mmap_miss > MMAP_LOTSAMISS)
return fpin;
+ ractl._active_refault = READ_ONCE(ra->active_refault);
+ if (ractl._active_refault)
+ WRITE_ONCE(ra->active_refault, --ractl._active_refault);
+
+ /*
+ * If there are a lot of refault of active pages in this file,
+ * that means the memory reclaim is ongoing. Stop bothering with
+ * read-ahead since it will only waste IO.
+ */
+ if (ractl._active_refault >= ACTIVE_REFAULT_LIMIT)
+ return fpin;
+
/*
* mmap read-around
*/
@@ -3151,6 +3164,9 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
ra->async_size = ra->ra_pages / 4;
ractl._index = ra->start;
page_cache_ra_order(&ractl, ra, 0);
+
+ WRITE_ONCE(ra->active_refault, ractl._active_refault);
+
return fpin;
}
diff --git a/mm/readahead.c b/mm/readahead.c
index cc4abb67eb223..d79bb70a232c4 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -263,6 +263,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
folio_set_readahead(folio);
ractl->_workingset |= folio_test_workingset(folio);
ractl->_nr_pages++;
+ if (unlikely(folio_test_workingset(folio)))
+ ractl->_active_refault++;
+ else if (unlikely(ractl->_active_refault))
+ ractl->_active_refault--;
}
/*
--
2.25.1
Hi, Jan, All,
Please take a look at this patch again. Although this may not be a graceful way.
I can't think any other way to fix the problem except using workingset.
Thanks,
On 2024/2/1 18:08, Liu Shixin wrote:
> When the pagefault is not for write and the refault distance is close,
> the page will be activated directly. If there are too many such pages in
> a file, that means the pages may be reclaimed immediately.
> In such situation, there is no positive effect to read-ahead since it will
> only waste IO. So collect the number of such pages and when the number is
> too large, stop bothering with read-ahead for a while until it decreased
> automatically.
>
> Define 'too large' as 10000 experientially, which can solves the problem
> and does not affect by the occasional active refault.
>
> Signed-off-by: Liu Shixin <liushixin2@huawei.com>
> ---
> include/linux/fs.h | 2 ++
> include/linux/pagemap.h | 1 +
> mm/filemap.c | 16 ++++++++++++++++
> mm/readahead.c | 4 ++++
> 4 files changed, 23 insertions(+)
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ed5966a704951..f2a1825442f5a 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -960,6 +960,7 @@ struct fown_struct {
> * the first of these pages is accessed.
> * @ra_pages: Maximum size of a readahead request, copied from the bdi.
> * @mmap_miss: How many mmap accesses missed in the page cache.
> + * @active_refault: Number of active page refault.
> * @prev_pos: The last byte in the most recent read request.
> *
> * When this structure is passed to ->readahead(), the "most recent"
> @@ -971,6 +972,7 @@ struct file_ra_state {
> unsigned int async_size;
> unsigned int ra_pages;
> unsigned int mmap_miss;
> + unsigned int active_refault;
> loff_t prev_pos;
> };
>
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 2df35e65557d2..da9eaf985dec4 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -1256,6 +1256,7 @@ struct readahead_control {
> pgoff_t _index;
> unsigned int _nr_pages;
> unsigned int _batch_count;
> + unsigned int _active_refault;
> bool _workingset;
> unsigned long _pflags;
> };
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 750e779c23db7..4de80592ab270 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -3037,6 +3037,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
>
> #ifdef CONFIG_MMU
> #define MMAP_LOTSAMISS (100)
> +#define ACTIVE_REFAULT_LIMIT (10000)
> /*
> * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
> * @vmf - the vm_fault for this fault.
> @@ -3142,6 +3143,18 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
> if (mmap_miss > MMAP_LOTSAMISS)
> return fpin;
>
> + ractl._active_refault = READ_ONCE(ra->active_refault);
> + if (ractl._active_refault)
> + WRITE_ONCE(ra->active_refault, --ractl._active_refault);
> +
> + /*
> + * If there are a lot of refault of active pages in this file,
> + * that means the memory reclaim is ongoing. Stop bothering with
> + * read-ahead since it will only waste IO.
> + */
> + if (ractl._active_refault >= ACTIVE_REFAULT_LIMIT)
> + return fpin;
> +
> /*
> * mmap read-around
> */
> @@ -3151,6 +3164,9 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
> ra->async_size = ra->ra_pages / 4;
> ractl._index = ra->start;
> page_cache_ra_order(&ractl, ra, 0);
> +
> + WRITE_ONCE(ra->active_refault, ractl._active_refault);
> +
> return fpin;
> }
>
> diff --git a/mm/readahead.c b/mm/readahead.c
> index cc4abb67eb223..d79bb70a232c4 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -263,6 +263,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
> folio_set_readahead(folio);
> ractl->_workingset |= folio_test_workingset(folio);
> ractl->_nr_pages++;
> + if (unlikely(folio_test_workingset(folio)))
> + ractl->_active_refault++;
> + else if (unlikely(ractl->_active_refault))
> + ractl->_active_refault--;
> }
>
> /*
On Thu 01-02-24 18:08:35, Liu Shixin wrote:
> When the pagefault is not for write and the refault distance is close,
> the page will be activated directly. If there are too many such pages in
> a file, that means the pages may be reclaimed immediately.
> In such situation, there is no positive effect to read-ahead since it will
> only waste IO. So collect the number of such pages and when the number is
> too large, stop bothering with read-ahead for a while until it decreased
> automatically.
>
> Define 'too large' as 10000 experientially, which can solves the problem
> and does not affect by the occasional active refault.
>
> Signed-off-by: Liu Shixin <liushixin2@huawei.com>
So I'm not convinced this new logic is needed. We already have
ra->mmap_miss which gets incremented when a page fault has to read the page
(and decremented when a page fault found the page already in cache). This
should already work to detect trashing as well, shouldn't it? If it does
not, why?
Honza
> ---
> include/linux/fs.h | 2 ++
> include/linux/pagemap.h | 1 +
> mm/filemap.c | 16 ++++++++++++++++
> mm/readahead.c | 4 ++++
> 4 files changed, 23 insertions(+)
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ed5966a704951..f2a1825442f5a 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -960,6 +960,7 @@ struct fown_struct {
> * the first of these pages is accessed.
> * @ra_pages: Maximum size of a readahead request, copied from the bdi.
> * @mmap_miss: How many mmap accesses missed in the page cache.
> + * @active_refault: Number of active page refault.
> * @prev_pos: The last byte in the most recent read request.
> *
> * When this structure is passed to ->readahead(), the "most recent"
> @@ -971,6 +972,7 @@ struct file_ra_state {
> unsigned int async_size;
> unsigned int ra_pages;
> unsigned int mmap_miss;
> + unsigned int active_refault;
> loff_t prev_pos;
> };
>
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 2df35e65557d2..da9eaf985dec4 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -1256,6 +1256,7 @@ struct readahead_control {
> pgoff_t _index;
> unsigned int _nr_pages;
> unsigned int _batch_count;
> + unsigned int _active_refault;
> bool _workingset;
> unsigned long _pflags;
> };
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 750e779c23db7..4de80592ab270 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -3037,6 +3037,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
>
> #ifdef CONFIG_MMU
> #define MMAP_LOTSAMISS (100)
> +#define ACTIVE_REFAULT_LIMIT (10000)
> /*
> * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
> * @vmf - the vm_fault for this fault.
> @@ -3142,6 +3143,18 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
> if (mmap_miss > MMAP_LOTSAMISS)
> return fpin;
>
> + ractl._active_refault = READ_ONCE(ra->active_refault);
> + if (ractl._active_refault)
> + WRITE_ONCE(ra->active_refault, --ractl._active_refault);
> +
> + /*
> + * If there are a lot of refault of active pages in this file,
> + * that means the memory reclaim is ongoing. Stop bothering with
> + * read-ahead since it will only waste IO.
> + */
> + if (ractl._active_refault >= ACTIVE_REFAULT_LIMIT)
> + return fpin;
> +
> /*
> * mmap read-around
> */
> @@ -3151,6 +3164,9 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
> ra->async_size = ra->ra_pages / 4;
> ractl._index = ra->start;
> page_cache_ra_order(&ractl, ra, 0);
> +
> + WRITE_ONCE(ra->active_refault, ractl._active_refault);
> +
> return fpin;
> }
>
> diff --git a/mm/readahead.c b/mm/readahead.c
> index cc4abb67eb223..d79bb70a232c4 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -263,6 +263,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
> folio_set_readahead(folio);
> ractl->_workingset |= folio_test_workingset(folio);
> ractl->_nr_pages++;
> + if (unlikely(folio_test_workingset(folio)))
> + ractl->_active_refault++;
> + else if (unlikely(ractl->_active_refault))
> + ractl->_active_refault--;
> }
>
> /*
> --
> 2.25.1
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
On 2024/2/1 17:37, Jan Kara wrote:
> On Thu 01-02-24 18:08:35, Liu Shixin wrote:
>> When the pagefault is not for write and the refault distance is close,
>> the page will be activated directly. If there are too many such pages in
>> a file, that means the pages may be reclaimed immediately.
>> In such situation, there is no positive effect to read-ahead since it will
>> only waste IO. So collect the number of such pages and when the number is
>> too large, stop bothering with read-ahead for a while until it decreased
>> automatically.
>>
>> Define 'too large' as 10000 experientially, which can solves the problem
>> and does not affect by the occasional active refault.
>>
>> Signed-off-by: Liu Shixin <liushixin2@huawei.com>
> So I'm not convinced this new logic is needed. We already have
> ra->mmap_miss which gets incremented when a page fault has to read the page
> (and decremented when a page fault found the page already in cache). This
> should already work to detect trashing as well, shouldn't it? If it does
> not, why?
>
> Honza
ra->mmap_miss doesn't help, it increased only one in do_sync_mmap_readahead()
and then decreased one for every page in filemap_map_pages(). So in this scenario,
it can't exceed MMAP_LOTSAMISS.
Thanks,
>
>> ---
>> include/linux/fs.h | 2 ++
>> include/linux/pagemap.h | 1 +
>> mm/filemap.c | 16 ++++++++++++++++
>> mm/readahead.c | 4 ++++
>> 4 files changed, 23 insertions(+)
>>
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index ed5966a704951..f2a1825442f5a 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -960,6 +960,7 @@ struct fown_struct {
>> * the first of these pages is accessed.
>> * @ra_pages: Maximum size of a readahead request, copied from the bdi.
>> * @mmap_miss: How many mmap accesses missed in the page cache.
>> + * @active_refault: Number of active page refault.
>> * @prev_pos: The last byte in the most recent read request.
>> *
>> * When this structure is passed to ->readahead(), the "most recent"
>> @@ -971,6 +972,7 @@ struct file_ra_state {
>> unsigned int async_size;
>> unsigned int ra_pages;
>> unsigned int mmap_miss;
>> + unsigned int active_refault;
>> loff_t prev_pos;
>> };
>>
>> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
>> index 2df35e65557d2..da9eaf985dec4 100644
>> --- a/include/linux/pagemap.h
>> +++ b/include/linux/pagemap.h
>> @@ -1256,6 +1256,7 @@ struct readahead_control {
>> pgoff_t _index;
>> unsigned int _nr_pages;
>> unsigned int _batch_count;
>> + unsigned int _active_refault;
>> bool _workingset;
>> unsigned long _pflags;
>> };
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index 750e779c23db7..4de80592ab270 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -3037,6 +3037,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
>>
>> #ifdef CONFIG_MMU
>> #define MMAP_LOTSAMISS (100)
>> +#define ACTIVE_REFAULT_LIMIT (10000)
>> /*
>> * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
>> * @vmf - the vm_fault for this fault.
>> @@ -3142,6 +3143,18 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
>> if (mmap_miss > MMAP_LOTSAMISS)
>> return fpin;
>>
>> + ractl._active_refault = READ_ONCE(ra->active_refault);
>> + if (ractl._active_refault)
>> + WRITE_ONCE(ra->active_refault, --ractl._active_refault);
>> +
>> + /*
>> + * If there are a lot of refault of active pages in this file,
>> + * that means the memory reclaim is ongoing. Stop bothering with
>> + * read-ahead since it will only waste IO.
>> + */
>> + if (ractl._active_refault >= ACTIVE_REFAULT_LIMIT)
>> + return fpin;
>> +
>> /*
>> * mmap read-around
>> */
>> @@ -3151,6 +3164,9 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
>> ra->async_size = ra->ra_pages / 4;
>> ractl._index = ra->start;
>> page_cache_ra_order(&ractl, ra, 0);
>> +
>> + WRITE_ONCE(ra->active_refault, ractl._active_refault);
>> +
>> return fpin;
>> }
>>
>> diff --git a/mm/readahead.c b/mm/readahead.c
>> index cc4abb67eb223..d79bb70a232c4 100644
>> --- a/mm/readahead.c
>> +++ b/mm/readahead.c
>> @@ -263,6 +263,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
>> folio_set_readahead(folio);
>> ractl->_workingset |= folio_test_workingset(folio);
>> ractl->_nr_pages++;
>> + if (unlikely(folio_test_workingset(folio)))
>> + ractl->_active_refault++;
>> + else if (unlikely(ractl->_active_refault))
>> + ractl->_active_refault--;
>> }
>>
>> /*
>> --
>> 2.25.1
>>
On Thu 01-02-24 18:41:30, Liu Shixin wrote: > On 2024/2/1 17:37, Jan Kara wrote: > > On Thu 01-02-24 18:08:35, Liu Shixin wrote: > >> When the pagefault is not for write and the refault distance is close, > >> the page will be activated directly. If there are too many such pages in > >> a file, that means the pages may be reclaimed immediately. > >> In such situation, there is no positive effect to read-ahead since it will > >> only waste IO. So collect the number of such pages and when the number is > >> too large, stop bothering with read-ahead for a while until it decreased > >> automatically. > >> > >> Define 'too large' as 10000 experientially, which can solves the problem > >> and does not affect by the occasional active refault. > >> > >> Signed-off-by: Liu Shixin <liushixin2@huawei.com> > > So I'm not convinced this new logic is needed. We already have > > ra->mmap_miss which gets incremented when a page fault has to read the page > > (and decremented when a page fault found the page already in cache). This > > should already work to detect trashing as well, shouldn't it? If it does > > not, why? > > > > Honza > ra->mmap_miss doesn't help, it increased only one in do_sync_mmap_readahead() > and then decreased one for every page in filemap_map_pages(). So in this scenario, > it can't exceed MMAP_LOTSAMISS. I see, OK. But that's a (longstanding) bug in how mmap_miss is handled. Can you please test whether attached patches fix the trashing for you? At least now I can see mmap_miss properly increments when we are hitting uncached pages... Thanks! Honza -- Jan Kara <jack@suse.com> SUSE Labs, CR
On 2024/2/2 1:31, Jan Kara wrote:
> On Thu 01-02-24 18:41:30, Liu Shixin wrote:
>> On 2024/2/1 17:37, Jan Kara wrote:
>>> On Thu 01-02-24 18:08:35, Liu Shixin wrote:
>>>> When the pagefault is not for write and the refault distance is close,
>>>> the page will be activated directly. If there are too many such pages in
>>>> a file, that means the pages may be reclaimed immediately.
>>>> In such situation, there is no positive effect to read-ahead since it will
>>>> only waste IO. So collect the number of such pages and when the number is
>>>> too large, stop bothering with read-ahead for a while until it decreased
>>>> automatically.
>>>>
>>>> Define 'too large' as 10000 experientially, which can solves the problem
>>>> and does not affect by the occasional active refault.
>>>>
>>>> Signed-off-by: Liu Shixin <liushixin2@huawei.com>
>>> So I'm not convinced this new logic is needed. We already have
>>> ra->mmap_miss which gets incremented when a page fault has to read the page
>>> (and decremented when a page fault found the page already in cache). This
>>> should already work to detect trashing as well, shouldn't it? If it does
>>> not, why?
>>>
>>> Honza
>> ra->mmap_miss doesn't help, it increased only one in do_sync_mmap_readahead()
>> and then decreased one for every page in filemap_map_pages(). So in this scenario,
>> it can't exceed MMAP_LOTSAMISS.
> I see, OK. But that's a (longstanding) bug in how mmap_miss is handled. Can
> you please test whether attached patches fix the trashing for you? At least
> now I can see mmap_miss properly increments when we are hitting uncached
> pages... Thanks!
>
> Honza
The patch doesn't seem to have much effect. I will try to analyze why it doesn't work.
The attached file is my testcase.
Thanks,
>
#!/bin/bash
while true; do
flag=$(ps -ef | grep -v grep | grep alloc_page| wc -l)
if [ "$flag" -eq 0 ]; then
/alloc_page &
fi
sleep 30
start_time=$(date +%s)
yum install -y expect > /dev/null 2>&1
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
echo "$elapsed_time seconds"
yum remove -y expect > /dev/null 2>&1
done#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#define SIZE 1*1024*1024 //1M
int main()
{
void *ptr = NULL;
int i;
for (i = 0; i < 1024 * 6 - 50;i++) {
ptr = (void *) malloc(SIZE);
if (ptr == NULL) {
printf("malloc err!");
return -1;
}
memset(ptr, 0, SIZE);
}
sleep(99999);
free(ptr);
return 0;
}
On 2024/2/2 17:02, Liu Shixin wrote: > > On 2024/2/2 1:31, Jan Kara wrote: >> On Thu 01-02-24 18:41:30, Liu Shixin wrote: >>> On 2024/2/1 17:37, Jan Kara wrote: >>>> On Thu 01-02-24 18:08:35, Liu Shixin wrote: >>>>> When the pagefault is not for write and the refault distance is close, >>>>> the page will be activated directly. If there are too many such pages in >>>>> a file, that means the pages may be reclaimed immediately. >>>>> In such situation, there is no positive effect to read-ahead since it will >>>>> only waste IO. So collect the number of such pages and when the number is >>>>> too large, stop bothering with read-ahead for a while until it decreased >>>>> automatically. >>>>> >>>>> Define 'too large' as 10000 experientially, which can solves the problem >>>>> and does not affect by the occasional active refault. >>>>> >>>>> Signed-off-by: Liu Shixin <liushixin2@huawei.com> >>>> So I'm not convinced this new logic is needed. We already have >>>> ra->mmap_miss which gets incremented when a page fault has to read the page >>>> (and decremented when a page fault found the page already in cache). This >>>> should already work to detect trashing as well, shouldn't it? If it does >>>> not, why? >>>> >>>> Honza >>> ra->mmap_miss doesn't help, it increased only one in do_sync_mmap_readahead() >>> and then decreased one for every page in filemap_map_pages(). So in this scenario, >>> it can't exceed MMAP_LOTSAMISS. >> I see, OK. But that's a (longstanding) bug in how mmap_miss is handled. Can >> you please test whether attached patches fix the trashing for you? At least >> now I can see mmap_miss properly increments when we are hitting uncached >> pages... Thanks! >> >> Honza > The patch doesn't seem to have much effect. I will try to analyze why it doesn't work. > The attached file is my testcase. > > Thanks, I think I figured out why mmap_miss doesn't work. After do_sync_mmap_readahead(), there is a __filemap_get_folio() to make sure the page is ready. Then, it is ready too in filemap_map_pages(), so the mmap_miss will decreased once. mmap_miss goes back to 0, and can't stop read-ahead. Overall, I don't think mmap_miss can solve this problem. .
On 2024/2/2 1:31, Jan Kara wrote: > On Thu 01-02-24 18:41:30, Liu Shixin wrote: >> On 2024/2/1 17:37, Jan Kara wrote: >>> On Thu 01-02-24 18:08:35, Liu Shixin wrote: >>>> When the pagefault is not for write and the refault distance is close, >>>> the page will be activated directly. If there are too many such pages in >>>> a file, that means the pages may be reclaimed immediately. >>>> In such situation, there is no positive effect to read-ahead since it will >>>> only waste IO. So collect the number of such pages and when the number is >>>> too large, stop bothering with read-ahead for a while until it decreased >>>> automatically. >>>> >>>> Define 'too large' as 10000 experientially, which can solves the problem >>>> and does not affect by the occasional active refault. >>>> >>>> Signed-off-by: Liu Shixin <liushixin2@huawei.com> >>> So I'm not convinced this new logic is needed. We already have >>> ra->mmap_miss which gets incremented when a page fault has to read the page >>> (and decremented when a page fault found the page already in cache). This >>> should already work to detect trashing as well, shouldn't it? If it does >>> not, why? >>> >>> Honza >> ra->mmap_miss doesn't help, it increased only one in do_sync_mmap_readahead() >> and then decreased one for every page in filemap_map_pages(). So in this scenario, >> it can't exceed MMAP_LOTSAMISS. > I see, OK. But that's a (longstanding) bug in how mmap_miss is handled. Can > you please test whether attached patches fix the trashing for you? At least > now I can see mmap_miss properly increments when we are hitting uncached > pages... Thanks! > > Honza Thanks for the patch, I will test it. >
© 2016 - 2025 Red Hat, Inc.