As there are more ALLOC_ flags that affect reserves, define what flags
affect reserves and clarify the effect of each flag.
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
mm/internal.h | 3 +++
mm/page_alloc.c | 36 +++++++++++++++++++++++-------------
2 files changed, 26 insertions(+), 13 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 9a9d9b5ee87f..370500718732 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -757,6 +757,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+/* Flags that allow allocations below the min watermark. */
+#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
+
enum ttu_flags;
struct tlbflush_unmap_batch;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e2b65767dda0..85a87d0ac57a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3944,15 +3944,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
static inline long __zone_watermark_unusable_free(struct zone *z,
unsigned int order, unsigned int alloc_flags)
{
- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
long unusable_free = (1 << order) - 1;
/*
- * If the caller does not have rights to ALLOC_HARDER then subtract
- * the high-atomic reserves. This will over-estimate the size of the
- * atomic reserve but it avoids a search.
+ * If the caller does not have rights to reserves below the min
+ * watermark then subtract the high-atomic reserves. This will
+ * over-estimate the size of the atomic reserve but it avoids a search.
*/
- if (likely(!alloc_harder))
+ if (likely(!(alloc_flags & ALLOC_RESERVES)))
unusable_free += z->nr_reserved_highatomic;
#ifdef CONFIG_CMA
@@ -3976,25 +3975,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
{
long min = mark;
int o;
- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
/* free_pages may go negative - that's OK */
free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
- if (alloc_flags & ALLOC_MIN_RESERVE)
- min -= min / 2;
+ if (alloc_flags & ALLOC_RESERVES) {
+ /*
+ * __GFP_HIGH allows access to 50% of the min reserve as well
+ * as OOM.
+ */
+ if (alloc_flags & ALLOC_MIN_RESERVE)
+ min -= min / 2;
- if (unlikely(alloc_harder)) {
/*
- * OOM victims can try even harder than normal ALLOC_HARDER
+ * Non-blocking allocations can access some of the reserve
+ * with more access if also __GFP_HIGH. The reasoning is that
+ * a non-blocking caller may incur a more severe penalty
+ * if it cannot get memory quickly, particularly if it's
+ * also __GFP_HIGH.
+ */
+ if (alloc_flags & (ALLOC_HARDER|ALLOC_HIGHATOMIC))
+ min -= min / 4;
+
+ /*
+ * OOM victims can try even harder than the normal reserve
* users on the grounds that it's definitely going to be in
* the exit path shortly and free memory. Any allocation it
* makes during the free path will be small and short-lived.
*/
if (alloc_flags & ALLOC_OOM)
min -= min / 2;
- else
- min -= min / 4;
}
/*
@@ -5293,7 +5303,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* could deplete whole memory reserves which would just make
* the situation worse
*/
- page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
+ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE|ALLOC_HARDER, ac);
if (page)
goto got_pg;
--
2.35.3
On 11/29/22 16:16, Mel Gorman wrote: > As there are more ALLOC_ flags that affect reserves, define what flags > affect reserves and clarify the effect of each flag. Seems to me this does more than a clarification, but also some functional tweaks, so it could be helpful if those were spelled out in the changelog. > Signed-off-by: Mel Gorman <mgorman@techsingularity.net> > --- > mm/internal.h | 3 +++ > mm/page_alloc.c | 36 +++++++++++++++++++++++------------- > 2 files changed, 26 insertions(+), 13 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index 9a9d9b5ee87f..370500718732 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -757,6 +757,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, > #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ > #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ > > +/* Flags that allow allocations below the min watermark. */ > +#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) > + > enum ttu_flags; > struct tlbflush_unmap_batch; > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index e2b65767dda0..85a87d0ac57a 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -3944,15 +3944,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); > static inline long __zone_watermark_unusable_free(struct zone *z, > unsigned int order, unsigned int alloc_flags) > { > - const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); > long unusable_free = (1 << order) - 1; > > /* > - * If the caller does not have rights to ALLOC_HARDER then subtract > - * the high-atomic reserves. This will over-estimate the size of the > - * atomic reserve but it avoids a search. > + * If the caller does not have rights to reserves below the min > + * watermark then subtract the high-atomic reserves. This will > + * over-estimate the size of the atomic reserve but it avoids a search. > */ > - if (likely(!alloc_harder)) > + if (likely(!(alloc_flags & ALLOC_RESERVES))) > unusable_free += z->nr_reserved_highatomic; > > #ifdef CONFIG_CMA > @@ -3976,25 +3975,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, > { > long min = mark; > int o; > - const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); > > /* free_pages may go negative - that's OK */ > free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); > > - if (alloc_flags & ALLOC_MIN_RESERVE) > - min -= min / 2; > + if (alloc_flags & ALLOC_RESERVES) { Do we want to keep this unlikely() as alloc_harder did before? > + /* > + * __GFP_HIGH allows access to 50% of the min reserve as well > + * as OOM. > + */ > + if (alloc_flags & ALLOC_MIN_RESERVE) > + min -= min / 2; > > - if (unlikely(alloc_harder)) { > /* > - * OOM victims can try even harder than normal ALLOC_HARDER > + * Non-blocking allocations can access some of the reserve > + * with more access if also __GFP_HIGH. The reasoning is that > + * a non-blocking caller may incur a more severe penalty > + * if it cannot get memory quickly, particularly if it's > + * also __GFP_HIGH. > + */ > + if (alloc_flags & (ALLOC_HARDER|ALLOC_HIGHATOMIC)) > + min -= min / 4; For example this seems to change the allowed dip to reserves for ALLOC_HIGHATOMIC. > + > + /* > + * OOM victims can try even harder than the normal reserve > * users on the grounds that it's definitely going to be in > * the exit path shortly and free memory. Any allocation it > * makes during the free path will be small and short-lived. > */ > if (alloc_flags & ALLOC_OOM) > min -= min / 2; > - else > - min -= min / 4; > } (noted that this patch doesn't seem to change the concern I raised in previous patch) > /* > @@ -5293,7 +5303,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > * could deplete whole memory reserves which would just make > * the situation worse > */ > - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); > + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE|ALLOC_HARDER, ac); And this AFAICS seems to give __GFP_NOFAIL 3/4 of min reserves instead of 1/4, which seems like a significant change (but hopefully ok) so worth noting at least. > if (page) > goto got_pg; >
On Thu, Dec 08, 2022 at 06:55:00PM +0100, Vlastimil Babka wrote: > On 11/29/22 16:16, Mel Gorman wrote: > > As there are more ALLOC_ flags that affect reserves, define what flags > > affect reserves and clarify the effect of each flag. > > Seems to me this does more than a clarification, but also some functional > tweaks, so it could be helpful if those were spelled out in the changelog. > I will to take out the problematic parts that need clarification. There are two, one I'll drop and the other will be split. More details below. > > @@ -3976,25 +3975,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, > > { > > long min = mark; > > int o; > > - const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); > > > > /* free_pages may go negative - that's OK */ > > free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); > > > > - if (alloc_flags & ALLOC_MIN_RESERVE) > > - min -= min / 2; > > + if (alloc_flags & ALLOC_RESERVES) { > > Do we want to keep this unlikely() as alloc_harder did before? > Added back in. > > + /* > > + * __GFP_HIGH allows access to 50% of the min reserve as well > > + * as OOM. > > + */ > > + if (alloc_flags & ALLOC_MIN_RESERVE) > > + min -= min / 2; > > > > - if (unlikely(alloc_harder)) { > > /* > > - * OOM victims can try even harder than normal ALLOC_HARDER > > + * Non-blocking allocations can access some of the reserve > > + * with more access if also __GFP_HIGH. The reasoning is that > > + * a non-blocking caller may incur a more severe penalty > > + * if it cannot get memory quickly, particularly if it's > > + * also __GFP_HIGH. > > + */ > > + if (alloc_flags & (ALLOC_HARDER|ALLOC_HIGHATOMIC)) > > + min -= min / 4; > > For example this seems to change the allowed dip to reserves for > ALLOC_HIGHATOMIC. > You're right and this could cause problems. If high-order atomic allocation failures start appearing again, this change would help but it should be a standalone patch in response to a bug. I'll drop it for now. > > + > > + /* > > + * OOM victims can try even harder than the normal reserve > > * users on the grounds that it's definitely going to be in > > * the exit path shortly and free memory. Any allocation it > > * makes during the free path will be small and short-lived. > > */ > > if (alloc_flags & ALLOC_OOM) > > min -= min / 2; > > - else > > - min -= min / 4; > > } > > (noted that this patch doesn't seem to change the concern I raised in > previous patch) > This might be addressed now with the chjanges to the patch that caused you concerns about OOM handling. > > /* > > @@ -5293,7 +5303,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > > * could deplete whole memory reserves which would just make > > * the situation worse > > */ > > - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); > > + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE|ALLOC_HARDER, ac); > > And this AFAICS seems to give __GFP_NOFAIL 3/4 of min reserves instead of > 1/4, which seems like a significant change (but hopefully ok) so worth > noting at least. > It deserves a standalone patch. Below is the diff I intend to apply to this patch and the standalone patch. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 58e01a31492e..6f41b84a97ac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3984,7 +3984,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, /* free_pages may go negative - that's OK */ free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); - if (alloc_flags & ALLOC_RESERVES) { + if (unlikely(alloc_flags & ALLOC_RESERVES)) { /* * __GFP_HIGH allows access to 50% of the min reserve as well * as OOM. @@ -3999,7 +3999,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, * if it cannot get memory quickly, particularly if it's * also __GFP_HIGH. */ - if (alloc_flags & (ALLOC_HARDER|ALLOC_HIGHATOMIC)) + if (alloc_flags & ALLOC_HARDER) min -= min / 4; /* @@ -5308,7 +5308,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * could deplete whole memory reserves which would just make * the situation worse */ - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE|ALLOC_HARDER, ac); + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); if (page) goto got_pg; The patch to allow __GFP_NOFAIL deeper access is this --8<-- mm/page_alloc.c: Allow __GFP_NOFAIL requests deeper access to reserves Currently __GFP_NOFAIL allocations without any other flags can access 25% of the reserves but these requests imply that the system cannot make forward progress until the allocation succeeds. Allow __GFP_NOFAIL access to 75% of the min reserve. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6f41b84a97ac..d2df78f5baa2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5308,7 +5308,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * could deplete whole memory reserves which would just make * the situation worse */ - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE|ALLOC_HARDER, ac); if (page) goto got_pg;
© 2016 - 2025 Red Hat, Inc.