From nobody Thu Oct 2 14:22:15 2025 Received: from shelob.surriel.com (shelob.surriel.com [96.67.55.147]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DCD5322A7F1 for ; Thu, 25 Sep 2025 22:11:47 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=96.67.55.147 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758838311; cv=none; b=ARHYt3/2JaqNqnuDtm0BEMXwSRDNvQM9H2hU0q9qQSzOkfqRVkacZAhflCf+Asc7CkctMzA4xXqQAtaIpLIDTm2QUHowkeU6/x3tyNuVFgUl8iGfet3SFGXCjEId4RZv94MfBD86SvWZpeoUoOnvJGQl/XIrXZ1fse/SqYv5osw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758838311; c=relaxed/simple; bh=KRZUjIoTeC7m1z4qfK7KptC4MVbQ+rln0ITsg3ZUIys=; h=Date:From:To:Cc:Subject:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=F7lRCcP1sWHMvLz6bfafMNMjvD+1dqUA1IpqNHaDRlXR6s/hOLZX92x5HPMnc1d+OVsf7ZFnY0hn1g0mAlGGDS6VSB9lXJ95Ch+czLYCStG04HcQOhiYilO+zuNj/61QSQpd/vtp0WqSpzxCJHVLafqLu1m9AXcctH2vsnYbskk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=surriel.com; spf=pass smtp.mailfrom=surriel.com; dkim=pass (2048-bit key) header.d=surriel.com header.i=@surriel.com header.b=InjnGNFI; arc=none smtp.client-ip=96.67.55.147 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=surriel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=surriel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=surriel.com header.i=@surriel.com header.b="InjnGNFI" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=surriel.com ; s=mail; h=Content-Transfer-Encoding:Content-Type:MIME-Version:References: In-Reply-To:Message-ID:Subject:Cc:To:From:Date:Sender:Reply-To:Content-ID: Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc :Resent-Message-ID:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=GJH2ck5XEjk64tYSAUJ8Mmt99YWLK51DqiT3bpdAVPA=; b=InjnGNFI7HfscQkEpKolRkt3hu WYjmhUaN3ewDzXN71Pe6fy+3RPTlSXiwC1oIxmDA6tx5Rxnc2mVK5jCntHB7WhtfqhsK9Pn+2Yyjo OTMaMNtCMI7k7yqr78xtQ9oiiWqx1Tw9ue1I3rBro+tgS2y/fdEZ+f+E+xs5j4Q+RAnq9kEQyzUbR +kYSOGF4k+pVVCQKcRkKVHTqrh3+I77Ykj1TP8IvNlk7vGLIDGSxt0sMn80+4d37RKMY1+7wHaKlH esASzAqtFJxvUqEYuNCEosGAoa9q9F+//5JVntfr6Fl4Ihn5M7DQLtLRWdXWwRLBImCbLWiyypxZK BSfGCsnw==; Received: from [2601:18c:8180:83cc:5a47:caff:fe78:8708] (helo=fangorn) by shelob.surriel.com with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.97.1) (envelope-from ) id 1v1uBC-000000008PQ-0cEe; Thu, 25 Sep 2025 18:11:10 -0400 Date: Thu, 25 Sep 2025 18:11:09 -0400 From: Rik van Riel To: Frank van der Linden Cc: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org, hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, kernel-team@meta.com Subject: [RFC PATCH 00/12] mm,cma: call CMA balancing from page reclaim code Message-ID: <20250925181109.11dd36e5@fangorn> In-Reply-To: <20250915195153.462039-1-fvdl@google.com> References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: Claws Mail 4.3.1 (GTK 3.24.43; x86_64-redhat-linux-gnu) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Call CMA balancing from the page reclaim code, if page reclaim is reclaiming pages that are unsuitable for the allocator. To keep direct reclaim latencies low, kswapd will do CMA balancing whenever some of the reclaimed pages are unsuitable for the allocator that woke up kswapd, while the direct reclaimers will only do CMA balancing if most of the reclaimed pages are unsuitable. Signed-off-by: Rik van Riel --- mm/vmscan.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index a48aec8bfd92..ec6bde5b07d3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -168,6 +168,9 @@ struct scan_control { /* Number of pages freed so far during a call to shrink_zones() */ unsigned long nr_reclaimed; =20 + /* Number of pages reclaimed, but unsuitable to the allocator */ + unsigned long nr_unsuitable; + struct { unsigned int dirty; unsigned int unqueued_dirty; @@ -1092,6 +1095,19 @@ static bool may_enter_fs(struct folio *folio, gfp_t = gfp_mask) return !data_race(folio_swap_flags(folio) & SWP_FS_OPS); } =20 +#ifdef CONFIG_CMA +static bool unsuitable_folio(struct folio *folio, struct scan_control *sc) +{ + return gfp_migratetype(sc->gfp_mask) !=3D MIGRATE_MOVABLE && + folio_migratetype(folio) =3D=3D MIGRATE_CMA; +} +#else +static bool unsuitable_folio(struct folio *folio, struct scan_control *sc) +{ + return false; +} +#endif + /* * shrink_folio_list() returns the number of reclaimed pages */ @@ -1103,7 +1119,7 @@ static unsigned int shrink_folio_list(struct list_hea= d *folio_list, struct folio_batch free_folios; LIST_HEAD(ret_folios); LIST_HEAD(demote_folios); - unsigned int nr_reclaimed =3D 0, nr_demoted =3D 0; + unsigned int nr_reclaimed =3D 0, nr_demoted =3D 0, nr_unsuitable =3D 0; unsigned int pgactivate =3D 0; bool do_demote_pass; struct swap_iocb *plug =3D NULL; @@ -1530,6 +1546,8 @@ static unsigned int shrink_folio_list(struct list_hea= d *folio_list, * leave it off the LRU). */ nr_reclaimed +=3D nr_pages; + if (unsuitable_folio(folio, sc)) + nr_unsuitable +=3D nr_pages; continue; } } @@ -1560,6 +1578,8 @@ static unsigned int shrink_folio_list(struct list_hea= d *folio_list, * all pages in it. */ nr_reclaimed +=3D nr_pages; + if (unsuitable_folio(folio, sc)) + nr_unsuitable +=3D nr_pages; =20 folio_unqueue_deferred_split(folio); if (folio_batch_add(&free_folios, folio) =3D=3D 0) { @@ -1641,6 +1661,9 @@ static unsigned int shrink_folio_list(struct list_hea= d *folio_list, =20 if (plug) swap_write_unplug(plug); + + sc->nr_unsuitable +=3D nr_unsuitable; + return nr_reclaimed; } =20 @@ -6431,6 +6454,10 @@ static unsigned long do_try_to_free_pages(struct zon= elist *zonelist, =20 delayacct_freepages_end(); =20 + /* Almost all memory reclaimed was unsuitable? Move data into CMA. */ + if (sc->nr_unsuitable >=3D sc->nr_reclaimed - 2) + balance_cma_zonelist(zonelist, SWAP_CLUSTER_MAX); + if (sc->nr_reclaimed) return sc->nr_reclaimed; =20 @@ -7169,6 +7196,8 @@ static int balance_pgdat(pg_data_t *pgdat, int order,= int highest_zoneidx) if (!sc.nr_reclaimed) pgdat->kswapd_failures++; =20 + if (sc.nr_unsuitable) + balance_node_cma(pgdat->node_id, NULL); out: clear_reclaim_active(pgdat, highest_zoneidx); =20 --=20 2.47.3 From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pg1-f201.google.com (mail-pg1-f201.google.com [209.85.215.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DFE4927FB03 for ; Mon, 15 Sep 2025 19:52:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965941; cv=none; b=uziKvpkMxagqdeqF7TWLMQQ0fBgWv6wAMXPu9/e0e/GrlAi/XdMp2hBqdJB3arZ7XpHO6mbTDQS/BPpj8c+NmZ2DeW4yRYrWud1zKsWmOuhu/tVX7r5pWQMWpUBVpeQYAXiLRI+dwVxeaeIDokSWfAjaMGsawnCubuzkkhlWZPo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965941; c=relaxed/simple; bh=WAwmYK+Wk+ezVkxFs6fZH7WJMojACKwK7Id+MTs3ixo=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=QwD9RqyD4LGEmcZCOhg73iNKSUO/uDBHR5D+WlI5GVQg+ilqM3CR3eoE9IuPFK5AdRNlImjodO5hUbTsyTJWn+t1iK6VgU78QcuBtgsghlhgy/ZtY83qfNt6t8N3vtN/7QeUJ1GQsnNHMP5G65fR/P9k2rkrT9Ae3xH+ICh7DP8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=JO1Jy62r; arc=none smtp.client-ip=209.85.215.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="JO1Jy62r" Received: by mail-pg1-f201.google.com with SMTP id 41be03b00d2f7-b54d1ae6fb3so851313a12.2 for ; Mon, 15 Sep 2025 12:52:19 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965939; x=1758570739; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=9MEHSxUH62Eh+PKLeZWp21wGeWPgaNRIAu0LrXAExrg=; b=JO1Jy62r4cfIH05C/bhc8GT0MN2z1dZon+Ou/zdYzsMj5MM7N1uLMM6gTw/z39k548 OWDpqdxLNjQzvNctR6IYezMrFCHFpJAgSEhFSkfd6coedRutau60NrHSex14ok1d5eza AS+ICBwACIRhOzEVkdUsXLMqhpvChwmQ2tSocCJRG6NZg7ByC0ZfuDtXHbPOOkb80zKP Ezqz7kV7oXM5LjozOj+3Y2UwEAn490xwE49ChcYEu+bjFSIJcL90kLjW+K+hALoHgCJG 9Him7g64QDAid8zxJlhVsZMFhmZY7V9Qxe54igXlygFtbiptwjoRYe1bCVcVz9+g1L9W ZBDw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965939; x=1758570739; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=9MEHSxUH62Eh+PKLeZWp21wGeWPgaNRIAu0LrXAExrg=; b=CwOLHJ54Q1X/uZEQE+KBpysceHBOS9rdXM1334Wqh5aVeKnugL3Vtf9lS8pVXffWrV jQV2QLDCjkAAZdquDUGXK03AoIFJoeREm0TbLpenDdzU/5OmwrFlDLuJVCqgM+jdRihZ OJyfGpRTQ7rdZHnPlkGAI5o8yeMV8zw4bB1BazCPB2WVriAR+8gtlSIDSIgMSFOfp05E Uhvs2xVtoZylD7X7MfwSIkTKymz9aMglzc9vrvrjvaviJfE7mHFqEhaVLaqdp+aIAYeB Eov1LQ+Y3hG8rfM7+RLH6DLatrwVbbXch56EhQPITo17KsBEb3TP7hVLEYk3Fduup+F9 v2Vw== X-Forwarded-Encrypted: i=1; AJvYcCWL02KlhO/pDd8M4ZTiHWa7VmWSF09GNCs7yyRZkWnedR2zM1vc4Sv4C2ZmP/THIQiCPOwUbxIlwmr8y0g=@vger.kernel.org X-Gm-Message-State: AOJu0Yz0LrN/E/JSFssy6OasvlX9uEmiaCe/sbz3Lav5GblDFsI05OPi EvqQE8YRaurZkp/23EHZeBnBaIvXOcU05P/rn7tKRsS4f7drXczDpsOo8rHP+E8llmRJJ92RPQ= = X-Google-Smtp-Source: AGHT+IGZlYJ0mle+xbHh0xSRjwktI5S74Ce+g2IBDqSwNJEmJuGQ84hMG5BDqQiRTBYzVAJcWam5sKXU X-Received: from pjss8.prod.google.com ([2002:a17:90a:1c08:b0:32e:12f0:20e3]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:903:1209:b0:246:2e9:daaa with SMTP id d9443c01a7336-25d242f23f6mr188309855ad.2.1757965939122; Mon, 15 Sep 2025 12:52:19 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:42 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-2-fvdl@google.com> Subject: [RFC PATCH 01/12] mm/cma: add tunable for CMA fallback limit From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a tunable to experiment with the circumstances under which movable allocations should use CMA pageblocks first, to avoid false OOM conditions. The limit is the percentage free memory which is being taken up by CMA. If the amount of used memory in CMA pageblocks is above this limit, CMA will be used first. So, 0 would mean always using CMA first, and 100 means never use CMA first. Currently the default is 50, which matches the existing behavior, so there is no functional change. Signed-off-by: Frank van der Linden --- include/linux/mm.h | 4 +++ mm/page_alloc.c | 84 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 74 insertions(+), 14 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ae97a0b8ec7..313ab38dc398 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3253,6 +3253,10 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask= , const char *fmt, ...); =20 extern void setup_per_cpu_pageset(void); =20 +#ifdef CONFIG_CMA +extern int cma_first_limit; +#endif + /* nommu.c */ extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d1d037f97c5f..d3966d31c039 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2446,6 +2446,54 @@ enum rmqueue_mode { RMQUEUE_STEAL, }; =20 +#ifdef CONFIG_CMA +/* + * The percentage of free CMA pages as part of the total number of free + * pages above which CMA is used first. + * 0 =3D always, 100 =3D never + */ +int cma_first_limit __read_mostly =3D 50; +EXPORT_SYMBOL_GPL(cma_first_limit); + +/* + * Return values: + * + * -1 - never try CMA (!ALLOC_CMA or !IS_ENABLED(CONFIG_CMA)) + * 0 - don't try CMA first + * 1 - try CMA first. + */ +static __always_inline int use_cma_first(struct zone *zone, + unsigned int alloc_flags) +{ + unsigned long free_cma, free_pages, cma_percentage; + + if (!(alloc_flags & ALLOC_CMA)) + return -1; + + free_cma =3D zone_page_state(zone, NR_FREE_CMA_PAGES); + if (!free_cma) + return -1; + + if (!cma_first_limit) + return 1; + + if (cma_first_limit =3D=3D 100) + return 0; + + free_pages =3D zone_page_state(zone, NR_FREE_PAGES); + if (!free_pages) + return 0; + + cma_percentage =3D (free_cma * 100) / free_pages; + return (cma_percentage > cma_first_limit) ? 1 : 0; +} +#else +static inline int use_cma_first(struct zone *zone, unsigned int alloc_flag= s) +{ + return -1; +} +#endif + /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. @@ -2455,20 +2503,13 @@ __rmqueue(struct zone *zone, unsigned int order, in= t migratetype, unsigned int alloc_flags, enum rmqueue_mode *mode) { struct page *page; + int cma_first; =20 - if (IS_ENABLED(CONFIG_CMA)) { - /* - * Balance movable allocations between regular and CMA areas by - * allocating from CMA when over half of the zone's free memory - * is in the CMA area. - */ - if (alloc_flags & ALLOC_CMA && - zone_page_state(zone, NR_FREE_CMA_PAGES) > - zone_page_state(zone, NR_FREE_PAGES) / 2) { - page =3D __rmqueue_cma_fallback(zone, order); - if (page) - return page; - } + cma_first =3D use_cma_first(zone, alloc_flags); + if (cma_first > 0) { + page =3D __rmqueue_cma_fallback(zone, order); + if (page) + return page; } =20 /* @@ -2487,7 +2528,11 @@ __rmqueue(struct zone *zone, unsigned int order, int= migratetype, return page; fallthrough; case RMQUEUE_CMA: - if (alloc_flags & ALLOC_CMA) { + /* + * Try CMA if we should, and haven't done so yet, + * which is indicated by cma_first =3D=3D 0. + */ + if (cma_first =3D=3D 0) { page =3D __rmqueue_cma_fallback(zone, order); if (page) { *mode =3D RMQUEUE_CMA; @@ -6672,6 +6717,17 @@ static const struct ctl_table page_alloc_sysctl_tabl= e[] =3D { .extra2 =3D SYSCTL_ONE_HUNDRED, }, #endif +#ifdef CONFIG_CMA + { + .procname =3D "cma_first_limit", + .data =3D &cma_first_limit, + .maxlen =3D sizeof(cma_first_limit), + .mode =3D 0644, + .proc_handler =3D proc_dointvec_minmax, + .extra1 =3D SYSCTL_ZERO, + .extra2 =3D SYSCTL_ONE_HUNDRED, + }, +#endif }; =20 void __init page_alloc_sysctl_init(void) --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pg1-f201.google.com (mail-pg1-f201.google.com [209.85.215.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 850962874E3 for ; Mon, 15 Sep 2025 19:52:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965943; cv=none; b=o8hy2UNpk8fixYai2rTKs+kWxz7sxL3IemVZx/aCLCPEvfvDC3EQgAG1WOZtA4ncafpr8aczfhxLHpd7Yf/2TuRqkWrQvcUVaeNA/J571EC7l4C+nCfTt0CyrupFko9q4FMyxHzrwfVp7McIPZXYkGzRtYi3OUjhu1kb5lAezRI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965943; c=relaxed/simple; bh=tEeg41JOEhGl7jTUXcSujf5WUTFOd4ZDMEOebrtmoXw=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=h35TXfnVfqS2XRimsLLFb+pJTpdoRFMzlEkcopnR6FuE0+J18eW37Jo28V0eKTclHHDOMEkXNeTzIgdSW2R1Q4evRrk2KaMeSybOTFvEuAXNgKJRCfE4VaRql3Qa3a3gES816cCTmo9qmWSPbHDrw4YV/fm14B/gGOMGCn8Ev6Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=le4FBNZE; arc=none smtp.client-ip=209.85.215.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="le4FBNZE" Received: by mail-pg1-f201.google.com with SMTP id 41be03b00d2f7-b54b37ba2d9so2252299a12.0 for ; Mon, 15 Sep 2025 12:52:21 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965941; x=1758570741; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=cc6GKOWoQL6x3lNuALuf8+hLk7kZZixPtZ6cf9+vrNU=; b=le4FBNZETBgcDkUyiEQMktl9GqyzwVDXvhfDqfvqXCgLFVH5eyNmp+Cb7zTMLdh5jW 6bZI5Eu9gPWsNjVe+LHc3QWncpTYbJGt8MdxNGu9Sis22bsDip1zvlM9/sToKnWQ0OoH iIl/7KM1A0GZ4s+NFGe36mINOX8LQrTHilbyDywEVhCj6MbdrzB1d/PbsnJ76jc8mVqX v1I5NxsVN7HIVFjl2JAKz2zWu+M3AGvGi4qGufXH9Zizr9j4xqr0+GmumuSX/xgzGiDZ frWCPBBH+JaStZhnoud4DDt7GS6H5S6+EMJh/eL67ouiVyzv/V/mI0Ya8j0zg9fGy6+S gG6w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965941; x=1758570741; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=cc6GKOWoQL6x3lNuALuf8+hLk7kZZixPtZ6cf9+vrNU=; b=MA89pI47exuFFDohKXh+z6wSOp/CK/80HVj5igqu7xcctKLc4JrmagLzKS1F5BLkiR zT04jBjDA6IukkwhAKOtKtdCGpRG+MWbriUU5cqv7XTqjNyoz9tl+oiEYdohA0zYFZ9O RArW1+wRRHToE5UEmKObDOF8grzl564UrKlT5rtlnAh8Ayc8ysPM9TBRMayTKIzsmNSM hPwrqcoPk3wX9nxOx7/Us3MFbuMDPw+74MjF7kyyZVltWk6kjnBl/euytLmn6Dkf51mn uxLbOOukUT8a1WqqtUCjfC0JTuicwb6MpzhkQW/WEV/5AZbrSqY6+nS4rFQH2RJ0XmFi 8YAw== X-Forwarded-Encrypted: i=1; AJvYcCV5PyhMWWRL7jDAeyI8V9R7vbeK0Jw8zkFNqePnyBMdSfwyLAV7wlHjioNGfHQOdDkgA58dMX5o4sJLwXQ=@vger.kernel.org X-Gm-Message-State: AOJu0Ywnq/rumj4xauIriNJAjukzY+HUKbJ+RAQMVql/adXpoXvs58H0 pi1KA9N+LHKDCpbIYuP5xX8x4e3IpZA3/nMjYIKiia5rNgHST4wBqkSxJqSELQO13HBgTQcpqQ= = X-Google-Smtp-Source: AGHT+IEWXitsQ7F/mFPysA89lJUOW5nGMTirfNQGq+Ms7D0jQCKeShp6e1b1+4LCWTQ/gzoNFF4WxIpN X-Received: from pjcl16.prod.google.com ([2002:a17:90a:3f10:b0:32e:1604:8630]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:90b:1d92:b0:32e:72bd:6d5a with SMTP id 98e67ed59e1d1-32e72bd7001mr4041613a91.1.1757965940695; Mon, 15 Sep 2025 12:52:20 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:43 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-3-fvdl@google.com> Subject: [RFC PATCH 02/12] mm/cma: clean up flag handling a bit From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Atomic bit operations aren't needed for the cma flags field, so switch their manipulation over to normal AND/OR operations. Also export the bit values in linux/cma.h, as we will be adding publicly used values later. No functional change. Signed-off-by: Frank van der Linden Reviewed-by: Rik van Riel --- include/linux/cma.h | 12 ++++++++++++ mm/cma.c | 16 ++++++++-------- mm/cma.h | 7 ------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 62d9c1cf6326..5c3fdc5da908 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -20,6 +20,18 @@ #define CMA_MIN_ALIGNMENT_PAGES pageblock_nr_pages #define CMA_MIN_ALIGNMENT_BYTES (PAGE_SIZE * CMA_MIN_ALIGNMENT_PAGES) =20 +enum cma_flags { + __CMA_RESERVE_PAGES_ON_ERROR, + __CMA_ZONES_VALID, + __CMA_ZONES_INVALID, + __CMA_ACTIVATED, +}; + +#define CMA_RESERVE_PAGES_ON_ERROR BIT(__CMA_RESERVE_PAGES_ON_ERROR) +#define CMA_ZONES_VALID BIT(__CMA_ZONES_VALID) +#define CMA_ZONES_INVALID BIT(__CMA_ZONES_INVALID) +#define CMA_ACTIVATED BIT(__CMA_ACTIVATED) + struct cma; =20 extern unsigned long totalcma_pages; diff --git a/mm/cma.c b/mm/cma.c index 2ffa4befb99a..549d85b2e3a3 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -111,8 +111,8 @@ bool cma_validate_zones(struct cma *cma) * check has already been done. If neither is set, the * check has not been performed yet. */ - valid_bit_set =3D test_bit(CMA_ZONES_VALID, &cma->flags); - if (valid_bit_set || test_bit(CMA_ZONES_INVALID, &cma->flags)) + valid_bit_set =3D (cma->flags & CMA_ZONES_VALID); + if (valid_bit_set || (cma->flags & CMA_ZONES_INVALID)) return valid_bit_set; =20 for (r =3D 0; r < cma->nranges; r++) { @@ -126,12 +126,12 @@ bool cma_validate_zones(struct cma *cma) */ WARN_ON_ONCE(!pfn_valid(base_pfn)); if (pfn_range_intersects_zones(cma->nid, base_pfn, cmr->count)) { - set_bit(CMA_ZONES_INVALID, &cma->flags); + cma->flags |=3D CMA_ZONES_INVALID; return false; } } =20 - set_bit(CMA_ZONES_VALID, &cma->flags); + cma->flags |=3D CMA_ZONES_VALID; =20 return true; } @@ -176,7 +176,7 @@ static void __init cma_activate_area(struct cma *cma) INIT_HLIST_HEAD(&cma->mem_head); spin_lock_init(&cma->mem_head_lock); #endif - set_bit(CMA_ACTIVATED, &cma->flags); + cma->flags |=3D CMA_ACTIVATED; =20 return; =20 @@ -185,7 +185,7 @@ static void __init cma_activate_area(struct cma *cma) bitmap_free(cma->ranges[r].bitmap); =20 /* Expose all pages to the buddy, they are useless for CMA. */ - if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) { + if (!(cma->flags & CMA_RESERVE_PAGES_ON_ERROR)) { for (r =3D 0; r < allocrange; r++) { cmr =3D &cma->ranges[r]; end_pfn =3D cmr->base_pfn + cmr->count; @@ -211,7 +211,7 @@ core_initcall(cma_init_reserved_areas); =20 void __init cma_reserve_pages_on_error(struct cma *cma) { - set_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags); + cma->flags |=3D CMA_RESERVE_PAGES_ON_ERROR; } =20 static int __init cma_new_area(const char *name, phys_addr_t size, @@ -1085,7 +1085,7 @@ void __init *cma_reserve_early(struct cma *cma, unsig= ned long size) /* * Can only be called early in init. */ - if (test_bit(CMA_ACTIVATED, &cma->flags)) + if (cma->flags & CMA_ACTIVATED) return NULL; =20 if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES)) diff --git a/mm/cma.h b/mm/cma.h index c70180c36559..25b696774c6a 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -64,13 +64,6 @@ struct cma { int nid; }; =20 -enum cma_flags { - CMA_RESERVE_PAGES_ON_ERROR, - CMA_ZONES_VALID, - CMA_ZONES_INVALID, - CMA_ACTIVATED, -}; - extern struct cma cma_areas[MAX_CMA_AREAS]; extern unsigned int cma_area_count; =20 --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pl1-f201.google.com (mail-pl1-f201.google.com [209.85.214.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 54B742882B8 for ; Mon, 15 Sep 2025 19:52:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965945; cv=none; b=DJ2Kl/hlrgKyLMreD8VDgDBEXOwLMEdLT1CscNjwI5kT7PXpo4UV15CExka7uj4VXnneop+3yM6RUg8e2H7BBBMLiZlYCcWprqeaXl6RTqM+lPmqhVjqZZYw1NBlKzWmVOG3AeVrIk3YwrWv7OxI5j6o0mH/FN71It6P4EWVxHE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965945; c=relaxed/simple; bh=3gg4p0LDgZ0ieUbl39w5am8jTt2SlSCj2wL15/ELYY0=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=OymEQf7HeM/4C1oCHdwq0E1iMWjuweQis//L7PKC+2jPyR9CPfuTnZGRzkg3RTXNEwcyk161iJ6V2plyt0Ks1a3S8Bb1Ix89cbQs9UG5QfiBJXBJQOPktoiQaM7v4k5SWUsmP+eDOeEUxfGIlJ6rvnyrQbyVJZUf0CzJijyXA70= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=OHuXKrik; arc=none smtp.client-ip=209.85.214.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="OHuXKrik" Received: by mail-pl1-f201.google.com with SMTP id d9443c01a7336-25d21fddb85so63626655ad.1 for ; Mon, 15 Sep 2025 12:52:23 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965942; x=1758570742; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=eCTw8mMhgOWN3pIIBb2gy+S0Zzj+AR1Jz+wSOMeCQlc=; b=OHuXKrikFPdHIJZJO3ZkVYNJ6zoycJlBKKri0/3fSIGdNzz+MzXcwSUCWoehUUPOTI qpsZkplJsBqbrqw9CvhHcAiHQ3yRsff/ts/ioAf6n6t2D82KyrOO1LAqgndWlZONRTjM EsMhpXLsFB4mQSpb3+clZSCyO/rMb+FvbhK3FcFcAPJBV13ngFHkHdXcnJcr1xriA00m l8VoM5eafsxngarcL/yQx3oXzu3h1lYcC0cthXUhfMK5bEkwJyc8F9KzC71CUsPCbFxU 6ZPC8A+9YhFiuDUgo4eK6PV7UWwitxW+dZP7sT6uugyDNGDHtk1XneTJaNC2UYIhO1+g uymA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965942; x=1758570742; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=eCTw8mMhgOWN3pIIBb2gy+S0Zzj+AR1Jz+wSOMeCQlc=; b=K/91aI2280T/NYvxXreas1F/y/aKbDISrg1YABsxE1PjFGJTXIzZgZdv6kjD18y1qJ 9u5pd4N0E2mCtzTjwXeAhuW6vuNgu3XCHP7Yj7MIkXoYQw3qHl9jlp/wwLewbR/fyBpP IzWuybURj76uh/A2F/O4ySwBt3am37aR3b6qe2RsFGgVx48VqPmfTOeWl6F/nDwP1aFJ KHW29NG5e0NIhsZ0kTQhd3QHZh6pvztlObcSIG+uPH4tf7FWWD6Bf3CAbVusbTycEDMw NuN8PDAWkH/2Mkc1zJrgyT06C6Kh1NBMnVi7LAIUt96ECcswih59FTcCVaMcDYL1kkec pTRA== X-Forwarded-Encrypted: i=1; AJvYcCX2M5waBtBCSCjipA8NHU+vBAl/Smy2PvNXBgoLMpelTCIC6bMe0kKZdOEPiizhJim636Sk3KciK2SYHjk=@vger.kernel.org X-Gm-Message-State: AOJu0YydLQjFvr7ifguPOX8z0qb2NYf5ZQFfKOuDYcOS2S4dHl9OSKtq cgDpKyF876m6koS7CoxrIU/VGhQntUCnB4R1ijjmdfGE7lzfQTNzDZ1KYbOuOGHBdMBaHemDLw= = X-Google-Smtp-Source: AGHT+IGswRfPvIKTnjtI+zMkN8y9+vmlGysmny2W+PeHbqNy2QPIL4i0nZkeiDjM4ghnJMXDuajKx5Z0 X-Received: from pjbpd11.prod.google.com ([2002:a17:90b:1dcb:b0:32e:73f6:ef02]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:903:4b0c:b0:250:411d:fa83 with SMTP id d9443c01a7336-25d248c991amr137402865ad.15.1757965942499; Mon, 15 Sep 2025 12:52:22 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:44 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-4-fvdl@google.com> Subject: [RFC PATCH 03/12] mm/cma: add flags argument to init functions From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a flags argument to the various CMA init functions, as there will be a need to pass in more flags to control init and runtime behavior other than just the current 'fixed' argument. Replace the fixed argument with a flags argument, and adapt callers. Signed-off-by: Frank van der Linden Acked-by: Rik van Riel --- arch/powerpc/kernel/fadump.c | 2 +- arch/powerpc/kvm/book3s_hv_builtin.c | 2 +- drivers/s390/char/vmcp.c | 2 +- include/linux/cma.h | 20 ++++++++++------ kernel/dma/contiguous.c | 10 ++++---- mm/cma.c | 36 ++++++++++++++++++---------- mm/hugetlb_cma.c | 2 +- 7 files changed, 46 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5782e743fd27..a763419bd1bc 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -112,7 +112,7 @@ void __init fadump_cma_init(void) return; } =20 - rc =3D cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); + rc =3D cma_init_reserved_mem(base, size, 0, 0, "fadump_cma", &fadump_cma); if (rc) { pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc); /* diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s= _hv_builtin.c index fa0e3a22cac0..23dcb67e797a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -95,7 +95,7 @@ void __init kvm_cma_reserve(void) (unsigned long)selected_size / SZ_1M); align_size =3D HPT_ALIGN_PAGES << PAGE_SHIFT; cma_declare_contiguous(0, selected_size, 0, align_size, - KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, "kvm_cma", + KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, 0, "kvm_cma", &kvm_cma); } } diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 69899bb86b3e..cd0c0edc496b 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -54,7 +54,7 @@ void __init vmcp_cma_reserve(void) { if (!machine_is_vm()) return; - cma_declare_contiguous(0, vmcp_cma_size, 0, 0, 0, false, "vmcp", &vmcp_cm= a); + cma_declare_contiguous(0, vmcp_cma_size, 0, 0, 0, 0, "vmcp", &vmcp_cma); } =20 static void vmcp_response_alloc(struct vmcp_session *session) diff --git a/include/linux/cma.h b/include/linux/cma.h index 5c3fdc5da908..ec48f2a11f1d 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -25,12 +25,16 @@ enum cma_flags { __CMA_ZONES_VALID, __CMA_ZONES_INVALID, __CMA_ACTIVATED, + __CMA_FIXED, }; =20 #define CMA_RESERVE_PAGES_ON_ERROR BIT(__CMA_RESERVE_PAGES_ON_ERROR) #define CMA_ZONES_VALID BIT(__CMA_ZONES_VALID) #define CMA_ZONES_INVALID BIT(__CMA_ZONES_INVALID) #define CMA_ACTIVATED BIT(__CMA_ACTIVATED) +#define CMA_FIXED BIT(__CMA_FIXED) + +#define CMA_INIT_FLAGS (CMA_FIXED|CMA_RESERVE_PAGES_ON_ERROR) =20 struct cma; =20 @@ -42,23 +46,25 @@ extern const char *cma_get_name(const struct cma *cma); extern int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma, - int nid); + unsigned long flags, const char *name, + struct cma **res_cma, int nid); static inline int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma) + unsigned long flags, const char *name, + struct cma **res_cma) { return cma_declare_contiguous_nid(base, size, limit, alignment, - order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); + order_per_bit, flags, name, res_cma, NUMA_NO_NODE); } extern int __init cma_declare_contiguous_multi(phys_addr_t size, phys_addr_t align, unsigned int order_per_bit, - const char *name, struct cma **res_cma, int nid); + unsigned long flags, const char *name, + struct cma **res_cma, int nid); extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, - const char *name, - struct cma **res_cma); + unsigned long flags, + const char *name, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsign= ed int align, bool no_warn); extern bool cma_pages_valid(struct cma *cma, const struct page *pages, uns= igned long count); diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index d9b9dcba6ff7..7f2eed3b7cc5 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -175,7 +175,7 @@ static void __init dma_numa_cma_reserve(void) cma =3D &dma_contiguous_pernuma_area[nid]; snprintf(name, sizeof(name), "pernuma%d", nid); ret =3D cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, - 0, false, name, cma, nid); + 0, 0, name, cma, nid); if (ret) pr_warn("%s: reservation failed: err %d, node %d", __func__, ret, nid); @@ -185,7 +185,7 @@ static void __init dma_numa_cma_reserve(void) =20 cma =3D &dma_contiguous_numa_area[nid]; snprintf(name, sizeof(name), "numa%d", nid); - ret =3D cma_declare_contiguous_nid(0, numa_cma_size[nid], 0, 0, 0, fals= e, + ret =3D cma_declare_contiguous_nid(0, numa_cma_size[nid], 0, 0, 0, 0, name, cma, nid); if (ret) pr_warn("%s: reservation failed: err %d, node %d", __func__, @@ -279,7 +279,8 @@ int __init dma_contiguous_reserve_area(phys_addr_t size= , phys_addr_t base, { int ret; =20 - ret =3D cma_declare_contiguous(base, size, limit, 0, 0, fixed, + ret =3D cma_declare_contiguous(base, size, limit, 0, 0, + fixed ? CMA_FIXED : 0, "reserved", res_cma); if (ret) return ret; @@ -478,7 +479,8 @@ static int __init rmem_cma_setup(struct reserved_mem *r= mem) return -EINVAL; } =20 - err =3D cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma= ); + err =3D cma_init_reserved_mem(rmem->base, rmem->size, 0, 0, rmem->name, + &cma); if (err) { pr_err("Reserved memory: unable to setup CMA region\n"); return err; diff --git a/mm/cma.c b/mm/cma.c index 549d85b2e3a3..00d8d365f0b5 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -35,6 +35,7 @@ =20 struct cma cma_areas[MAX_CMA_AREAS]; unsigned int cma_area_count; +static DEFINE_MUTEX(cma_mutex); =20 phys_addr_t cma_get_base(const struct cma *cma) { @@ -215,7 +216,7 @@ void __init cma_reserve_pages_on_error(struct cma *cma) } =20 static int __init cma_new_area(const char *name, phys_addr_t size, - unsigned int order_per_bit, + unsigned int order_per_bit, unsigned long flags, struct cma **res_cma) { struct cma *cma; @@ -239,6 +240,7 @@ static int __init cma_new_area(const char *name, phys_a= ddr_t size, =20 cma->available_count =3D cma->count =3D size >> PAGE_SHIFT; cma->order_per_bit =3D order_per_bit; + cma->flags =3D flags; *res_cma =3D cma; totalcma_pages +=3D cma->count; =20 @@ -265,7 +267,7 @@ static void __init cma_drop_area(struct cma *cma) */ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, - const char *name, + unsigned long flags, const char *name, struct cma **res_cma) { struct cma *cma; @@ -288,7 +290,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys= _addr_t size, if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES)) return -EINVAL; =20 - ret =3D cma_new_area(name, size, order_per_bit, &cma); + ret =3D cma_new_area(name, size, order_per_bit, flags, &cma); if (ret !=3D 0) return ret; =20 @@ -429,12 +431,18 @@ static phys_addr_t __init cma_alloc_mem(phys_addr_t b= ase, phys_addr_t size, static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma, - int nid) + unsigned long flags, const char *name, + struct cma **res_cma, int nid) { phys_addr_t memblock_end =3D memblock_end_of_DRAM(); phys_addr_t base =3D *basep; int ret; + bool fixed; + + if (flags & ~CMA_INIT_FLAGS) + return -EINVAL; + + fixed =3D (flags & CMA_FIXED); =20 pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", __func__, &size, &base, &limit, &alignment); @@ -503,7 +511,8 @@ static int __init __cma_declare_contiguous_nid(phys_add= r_t *basep, kmemleak_ignore_phys(base); } =20 - ret =3D cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); + ret =3D cma_init_reserved_mem(base, size, order_per_bit, flags, + name, res_cma); if (ret) { memblock_phys_free(base, size); return ret; @@ -526,7 +535,8 @@ static int __init __cma_declare_contiguous_nid(phys_add= r_t *basep, */ int __init cma_declare_contiguous_multi(phys_addr_t total_size, phys_addr_t align, unsigned int order_per_bit, - const char *name, struct cma **res_cma, int nid) + unsigned long flags, const char *name, + struct cma **res_cma, int nid) { phys_addr_t start =3D 0, end; phys_addr_t size, sizesum, sizeleft; @@ -543,7 +553,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t tot= al_size, * First, try it the normal way, producing just one range. */ ret =3D __cma_declare_contiguous_nid(&start, total_size, 0, align, - order_per_bit, false, name, res_cma, nid); + order_per_bit, flags, name, res_cma, nid); if (ret !=3D -ENOMEM) goto out; =20 @@ -567,7 +577,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t tot= al_size, sizesum =3D 0; failed =3D NULL; =20 - ret =3D cma_new_area(name, total_size, order_per_bit, &cma); + ret =3D cma_new_area(name, total_size, order_per_bit, flags, &cma); if (ret !=3D 0) goto out; =20 @@ -716,7 +726,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t tot= al_size, * @limit: End address of the reserved memory (optional, 0 for any). * @alignment: Alignment for the CMA area, should be power of 2 or zero * @order_per_bit: Order of pages represented by one bit on bitmap. - * @fixed: hint about where to place the reserved area + * @flags: flags controlling various aspects of the area * @name: The name of the area. See function cma_init_reserved_mem() * @res_cma: Pointer to store the created cma region. * @nid: nid of the free area to find, %NUMA_NO_NODE for any node @@ -732,13 +742,13 @@ int __init cma_declare_contiguous_multi(phys_addr_t t= otal_size, int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma, - int nid) + unsigned long flags, const char *name, + struct cma **res_cma, int nid) { int ret; =20 ret =3D __cma_declare_contiguous_nid(&base, size, limit, alignment, - order_per_bit, fixed, name, res_cma, nid); + order_per_bit, flags, name, res_cma, nid); if (ret !=3D 0) pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c index f58ef4969e7a..71d0e9a048d4 100644 --- a/mm/hugetlb_cma.c +++ b/mm/hugetlb_cma.c @@ -221,7 +221,7 @@ void __init hugetlb_cma_reserve(int order) * huge page demotion. */ res =3D cma_declare_contiguous_multi(size, PAGE_SIZE << order, - HUGETLB_PAGE_ORDER, name, + HUGETLB_PAGE_ORDER, 0, name, &hugetlb_cma[nid], nid); if (res) { pr_warn("hugetlb_cma: reservation failed: err %d, node %d", --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pl1-f202.google.com (mail-pl1-f202.google.com [209.85.214.202]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7ADE0288C2C for ; Mon, 15 Sep 2025 19:52:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.202 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965947; cv=none; b=kBzrKvVX7mLql33kbjhPx4HAWGWfCL4kbFBCpxuEkrlgYLGSimmBZyAIwwNylxKmk670NbTiHCehSlxArV5cI/rEEYBOMsCsUVpGTxhL3fZk9faYw4cXOhsdacRsFMlJ9ITJgi8BPhE5Y1C+IEn0lB3pB3Mz0OpB3mfFuiYpg1s= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965947; c=relaxed/simple; bh=q3V3MsJ7DztIP8fpon5vJbX30D6R7q52PZAummA8iI4=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=at4XqzT9D3g8iXmOwDlTwAU/+VAGFU0pVw8DRrDIFs7WLm5HEIsbErG8j7tRXfHfPSWwBzGNZN27rzXcZhFzc+m7LtFBy7r+w+49uqKSWw4OTk7UaWztsHB7Zhwjk3f2zpGr9hps9j9MDx0fnANNWiCPTQs5XY/bTC7rsnGVzUY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=gOiD94Vs; arc=none smtp.client-ip=209.85.214.202 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="gOiD94Vs" Received: by mail-pl1-f202.google.com with SMTP id d9443c01a7336-2641084fb5aso17846135ad.0 for ; Mon, 15 Sep 2025 12:52:24 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965944; x=1758570744; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=Vg42YXvjHFhdSPyd/4e9YIQchkUAG0NxSAV9DDvIrlA=; b=gOiD94Vsid3c9lQXwjmmC9/m+CbfuYPbM0QnsKNrAIe08z/SKY4VGs7NANbeLmMt5W KGyNGWQAlPKTV8vWk19bIQSCBpRhRq1eiXpzCQ9jwK4l0kMdgw+SBI3MsSgfeBHM6hrT LlP7b/kKlLkkPgA3Bbvi55Egs/prUXIDyvKyzRbyEHfwzTIDpx9Hw/ufiTk5q7EY6fdj 628bqhJFYsY2VJwMTDJdmbThlRK5BW2vEJKKYWgQLlVYEq3H+vfBwTO3Q56ey5++XLhO x1GwJjNRUUxzeMDR6bOmABaRwE/VqcDxfcXIL7M4CcCEF46IbdFgYlhtZN0rKKGM9Ty3 Dzzg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965944; x=1758570744; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=Vg42YXvjHFhdSPyd/4e9YIQchkUAG0NxSAV9DDvIrlA=; b=IQxZmzAZpRgnM7KLe91rKZ2p4HYnKa3XtehpTmJjHssTdm81jTV0KZD0lPAg5OFZgy QPvCoGWbVbXJOPD6xCdx1p/WdWIVOen+QMZ7Vx4YEFqlQn2NObnp2RI4ZeeIMDRW/Ugy 1YZDFklmaJP2VOxNoj45Ub1hj1+C0E6BC0VeuZndAlAwm0jKV1PH4xiPO7aib+Vj6Hux L0PF36BeW2sW8dnQEMZmL5S32sEX1x71qdLaB0ApvCHxqUOQehZroceruxPTxsCZ2h1H mxnSkAmyssB5ItWsJYePx018m0gAP4wwx2s7tx9eMXmVxLMOiG3APDlwWeEMWj3LqXZX o3Vw== X-Forwarded-Encrypted: i=1; AJvYcCVuw8RC6UbpV0diBOfiPfj1eBf2XXuv3d4WGQXCN7kodnQ0ym/0+0aPMm3Rs9Qv9DYE7MNQyAYG8V074ZM=@vger.kernel.org X-Gm-Message-State: AOJu0YxSK2npyw8o//LdF6Ljxz2N1huT/lqzws9V62PL2ezI5/J7Vrxs O6VMgrQZegNOysjHy6L5GfG7CRLnJoO3eFwPr31LMQ7XF8OSvEWRO/YPEqzLzHx55020YJkZ2Q= = X-Google-Smtp-Source: AGHT+IFLZf3/R1udLdGzfNx7SuklFJxoiGyP5wz+2NArO8lgYrlOW7Rh8Rc5osSmdBqt/k2sJfzU7IDn X-Received: from pjn16.prod.google.com ([2002:a17:90b:5710:b0:32d:d956:20fb]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:903:228f:b0:24c:be1f:c204 with SMTP id d9443c01a7336-25d24da3763mr151368475ad.22.1757965944068; Mon, 15 Sep 2025 12:52:24 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:45 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-5-fvdl@google.com> Subject: [RFC PATCH 04/12] mm/cma: keep a global sorted list of CMA ranges From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In order to walk through CMA areas efficiently, it is useful to keep a global sorted list of ranges. Create this list when activating the areas. Since users of this list may want to reference the CMA area the range came from, there needs to be a link from the range to that area. So, store a pointer to the CMA structure in the cma_memrange structure. This also reduces the number of arguments to a few internal functions. Signed-off-by: Frank van der Linden --- mm/cma.c | 72 ++++++++++++++++++++++++++++++++++++++++++-------------- mm/cma.h | 6 ++--- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index 00d8d365f0b5..1f5a7bfc9152 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -65,12 +66,11 @@ static unsigned long cma_bitmap_aligned_mask(const stru= ct cma *cma, * Find the offset of the base PFN from the specified align_order. * The value returned is represented in order_per_bits. */ -static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, - const struct cma_memrange *cmr, +static unsigned long cma_bitmap_aligned_offset(const struct cma_memrange *= cmr, unsigned int align_order) { return (cmr->base_pfn & ((1UL << align_order) - 1)) - >> cma->order_per_bit; + >> cmr->cma->order_per_bit; } =20 static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, @@ -79,11 +79,12 @@ static unsigned long cma_bitmap_pages_to_bits(const str= uct cma *cma, return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; } =20 -static void cma_clear_bitmap(struct cma *cma, const struct cma_memrange *c= mr, +static void cma_clear_bitmap(const struct cma_memrange *cmr, unsigned long pfn, unsigned long count) { unsigned long bitmap_no, bitmap_count; unsigned long flags; + struct cma *cma =3D cmr->cma; =20 bitmap_no =3D (pfn - cmr->base_pfn) >> cma->order_per_bit; bitmap_count =3D cma_bitmap_pages_to_bits(cma, count); @@ -147,8 +148,7 @@ static void __init cma_activate_area(struct cma *cma) for (allocrange =3D 0; allocrange < cma->nranges; allocrange++) { cmr =3D &cma->ranges[allocrange]; early_pfn[allocrange] =3D cmr->early_pfn; - cmr->bitmap =3D bitmap_zalloc(cma_bitmap_maxno(cma, cmr), - GFP_KERNEL); + cmr->bitmap =3D bitmap_zalloc(cma_bitmap_maxno(cmr), GFP_KERNEL); if (!cmr->bitmap) goto cleanup; } @@ -199,12 +199,45 @@ static void __init cma_activate_area(struct cma *cma) pr_err("CMA area %s could not be activated\n", cma->name); } =20 +static struct cma_memrange **cma_ranges; +static int cma_nranges; + +static int cmprange(const void *a, const void *b) +{ + struct cma_memrange *r1, *r2; + + r1 =3D *(struct cma_memrange **)a; + r2 =3D *(struct cma_memrange **)b; + + if (r1->base_pfn < r2->base_pfn) + return -1; + return r1->base_pfn - r2->base_pfn; +} + static int __init cma_init_reserved_areas(void) { - int i; + int i, r, nranges; + struct cma *cma; + struct cma_memrange *cmr; + + nranges =3D 0; + for (i =3D 0; i < cma_area_count; i++) { + cma =3D &cma_areas[i]; + nranges +=3D cma->nranges; + cma_activate_area(cma); + } + + cma_ranges =3D kcalloc(nranges, sizeof(*cma_ranges), GFP_KERNEL); + cma_nranges =3D 0; + for (i =3D 0; i < cma_area_count; i++) { + cma =3D &cma_areas[i]; + for (r =3D 0; r < cma->nranges; r++) { + cmr =3D &cma->ranges[r]; + cma_ranges[cma_nranges++] =3D cmr; + } + } =20 - for (i =3D 0; i < cma_area_count; i++) - cma_activate_area(&cma_areas[i]); + sort(cma_ranges, cma_nranges, sizeof(*cma_ranges), cmprange, NULL); =20 return 0; } @@ -297,6 +330,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys= _addr_t size, cma->ranges[0].base_pfn =3D PFN_DOWN(base); cma->ranges[0].early_pfn =3D PFN_DOWN(base); cma->ranges[0].count =3D cma->count; + cma->ranges[0].cma =3D cma; cma->nranges =3D 1; cma->nid =3D NUMA_NO_NODE; =20 @@ -687,6 +721,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t tot= al_size, cmrp->base_pfn =3D PHYS_PFN(mlp->base); cmrp->early_pfn =3D cmrp->base_pfn; cmrp->count =3D size >> PAGE_SHIFT; + cmrp->cma =3D cma; =20 sizeleft -=3D size; if (sizeleft =3D=3D 0) @@ -772,7 +807,7 @@ static void cma_debug_show_areas(struct cma *cma) for (r =3D 0; r < cma->nranges; r++) { cmr =3D &cma->ranges[r]; =20 - nbits =3D cma_bitmap_maxno(cma, cmr); + nbits =3D cma_bitmap_maxno(cmr); =20 pr_info("range %d: ", r); for_each_clear_bitrange(start, end, cmr->bitmap, nbits) { @@ -786,9 +821,9 @@ static void cma_debug_show_areas(struct cma *cma) spin_unlock_irq(&cma->lock); } =20 -static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, - unsigned long count, unsigned int align, - struct page **pagep, gfp_t gfp) +static int cma_range_alloc(struct cma_memrange *cmr, + unsigned long count, unsigned int align, + struct page **pagep, gfp_t gfp) { unsigned long mask, offset; unsigned long pfn =3D -1; @@ -796,10 +831,11 @@ static int cma_range_alloc(struct cma *cma, struct cm= a_memrange *cmr, unsigned long bitmap_maxno, bitmap_no, bitmap_count; int ret =3D -EBUSY; struct page *page =3D NULL; + struct cma *cma =3D cmr->cma; =20 mask =3D cma_bitmap_aligned_mask(cma, align); - offset =3D cma_bitmap_aligned_offset(cma, cmr, align); - bitmap_maxno =3D cma_bitmap_maxno(cma, cmr); + offset =3D cma_bitmap_aligned_offset(cmr, align); + bitmap_maxno =3D cma_bitmap_maxno(cmr); bitmap_count =3D cma_bitmap_pages_to_bits(cma, count); =20 if (bitmap_count > bitmap_maxno) @@ -840,7 +876,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_= memrange *cmr, break; } =20 - cma_clear_bitmap(cma, cmr, pfn, count); + cma_clear_bitmap(cmr, pfn, count); if (ret !=3D -EBUSY) break; =20 @@ -879,7 +915,7 @@ static struct page *__cma_alloc(struct cma *cma, unsign= ed long count, for (r =3D 0; r < cma->nranges; r++) { page =3D NULL; =20 - ret =3D cma_range_alloc(cma, &cma->ranges[r], count, align, + ret =3D cma_range_alloc(&cma->ranges[r], count, align, &page, gfp); if (ret !=3D -EBUSY || page) break; @@ -1011,7 +1047,7 @@ bool cma_release(struct cma *cma, const struct page *= pages, return false; =20 free_contig_range(pfn, count); - cma_clear_bitmap(cma, cmr, pfn, count); + cma_clear_bitmap(cmr, pfn, count); cma_sysfs_account_release_pages(cma, count); trace_cma_release(cma->name, pfn, pages, count); =20 diff --git a/mm/cma.h b/mm/cma.h index 25b696774c6a..384d1109d438 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -30,6 +30,7 @@ struct cma_memrange { unsigned long early_pfn; unsigned long *bitmap; }; + struct cma *cma; #ifdef CONFIG_CMA_DEBUGFS struct debugfs_u32_array dfs_bitmap; #endif @@ -67,10 +68,9 @@ struct cma { extern struct cma cma_areas[MAX_CMA_AREAS]; extern unsigned int cma_area_count; =20 -static inline unsigned long cma_bitmap_maxno(struct cma *cma, - struct cma_memrange *cmr) +static inline unsigned long cma_bitmap_maxno(struct cma_memrange *cmr) { - return cmr->count >> cma->order_per_bit; + return cmr->count >> cmr->cma->order_per_bit; } =20 #ifdef CONFIG_CMA_SYSFS --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pg1-f201.google.com (mail-pg1-f201.google.com [209.85.215.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 657B2289367 for ; Mon, 15 Sep 2025 19:52:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965948; cv=none; b=rGMW3VmN24vmsXhz18Py146U/mfBmWtvxddCL/W4bf9IafuEujROYYtuhvyKX5NwULD7e3P8c3WWIRUv0jLOGy+hgqbZHWv5zQUrC/ea3BcFNwQzZFL8NQm0Q1FAJfJgmnKWhpTmHwR8pwe0ZchhjQvz+DkyRX9tWV81SBPV75k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965948; c=relaxed/simple; bh=8LKRYHLCAZTJKnDKNFoSC4CZNzre4T6N3gX2Sm3PQrI=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=mRodtMaefsokpUikcVQ7Zw1eoJy6jmPoAjjGsCFP+V4GZ03mhSChvhmLrJivoi/oWh0JsT6mu7jX3NpfipMaMhrRxk9FdW8/X3miH+3UMVcSMQhtxyFWqqw0QaQ+ih82zBCmkfYp2FFDxAJH71YCkYXuBYycnIDWFklWgux3IYs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=FGGCLwc4; arc=none smtp.client-ip=209.85.215.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="FGGCLwc4" Received: by mail-pg1-f201.google.com with SMTP id 41be03b00d2f7-b4d3ab49a66so6505489a12.3 for ; Mon, 15 Sep 2025 12:52:26 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965946; x=1758570746; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=/HSCepCK3t1UxBnW9wh+oSKVJTG/pFFOaZ/Yho235Fc=; b=FGGCLwc4xUJXsIPuHKn/+edeY+TnfqIYX7w5Y9N1d08V2ePyMn9jsZ9H0Wm5HfiSnL uX6PjjhdDx4YZwlGBpCGr+1s4frnBaDOClVcODpphKuWWiH9UfIHb8HLrxQyfSi8w69f iexPZlfJogqPu23axMN2IIZdZ0AuMh2V7CoCmlK3e1l/mnI0SaIYawqDoWnBG5sFGVE/ 9l/MCFWp6Tccgw6IwqfEHvSiAgyyLc7Txidr4tssgz58yZ1YkP+une4J1IMHZln1TNFl I2RgJ6tWR9O4bwOXW9ohN5RvosTSzk9dDjAx64krFTvE43ClEC3SVJnEPjCuB/Yx8XMS Jjjg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965946; x=1758570746; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=/HSCepCK3t1UxBnW9wh+oSKVJTG/pFFOaZ/Yho235Fc=; b=SZovL4VwpGpGq87S0bmrMyRIzowZohd/K1MZnonRVehTBDGs2kGfkYr2VMnLvAKVSx hoNYVN8eLjCRgehhahfD1hArPrjgXS9Licw6pcqP1xf9GR81yVp54X1HnRWDNy8/WS+o uhyBeys2thff6F4T4gvbF1WO0kZt1K+yO8QZ+yWYwOPalSXS4kIF99JzzWOlUbV4plEi hvXm7EhV5DAho7L60pnHtzcFdLFp0GPePW+x59vJlWfcxRtmxo0uhm0wKW38O4BXS7nB xSvtmLz6uEMpYtI5I/H0cXyr1MFmBBnJ3Qiz3rme3QiMe+S7bZSrzQQ2U4WboeROTqlw TACg== X-Forwarded-Encrypted: i=1; AJvYcCV1UzPp+ivj80LR7dsrolUnCMegp1RgWY85BWr9qooaDivjxjjZgM6vf3DYeZqM2P2Bg7UavrthkuvIxeo=@vger.kernel.org X-Gm-Message-State: AOJu0Yw04//Bt2OGUXtLvUcpe24rCxdcRk1q91cEbyZAbem5xr+6ec4w pB4H2dcfUJejdUX8nGd6ULhVxttVUjhOqkVA1KmV4LGCjMJNWjVjf69n1eznUzcX27vdsIgq7g= = X-Google-Smtp-Source: AGHT+IH/aG7LJ/XwS+oMxCdx7EksVfml5Du7v3Dy129jptRP6+MPmhejd017Z1fgvIWqbyZ0zd5FVA53 X-Received: from pjv14.prod.google.com ([2002:a17:90b:564e:b0:32d:69b3:b7b0]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:90b:3c81:b0:32e:96b1:fb7f with SMTP id 98e67ed59e1d1-32e96b1ff96mr1957825a91.11.1757965945709; Mon, 15 Sep 2025 12:52:25 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:46 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-6-fvdl@google.com> Subject: [RFC PATCH 05/12] mm/cma: add helper functions for CMA balancing From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add some CMA helper functions to assist CMA balancing. They are: cma_get_available. - Returns the number of available pages in a CMA area cma_numranges - Returns the total number of CMA ranges. cma_next_balance_pagerange - Get the next CMA page range in a zone that has is available as a target for CMA balancing. This means a range that consists of CMA pageblocks that are managed by the buddy allocator (not allocated through cma_alloc). The array of CMA ranges is walked top down. cma_next_noncma_pagerange - Get the next non-CMA page range in a zone. The zone is traversed bottom up. Signed-off-by: Frank van der Linden --- include/linux/cma.h | 30 +++++++++ mm/cma.c | 161 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) diff --git a/include/linux/cma.h b/include/linux/cma.h index ec48f2a11f1d..0504580d61d0 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -37,6 +37,7 @@ enum cma_flags { #define CMA_INIT_FLAGS (CMA_FIXED|CMA_RESERVE_PAGES_ON_ERROR) =20 struct cma; +struct zone; =20 extern unsigned long totalcma_pages; extern phys_addr_t cma_get_base(const struct cma *cma); @@ -79,6 +80,12 @@ extern void cma_reserve_pages_on_error(struct cma *cma); struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); bool cma_free_folio(struct cma *cma, const struct folio *folio); bool cma_validate_zones(struct cma *cma); +int cma_numranges(void); +unsigned long cma_get_available(const struct cma *cma); +bool cma_next_balance_pagerange(struct zone *zone, struct cma *cma, int *r= index, + unsigned long *startpfn, unsigned long *endpfn); +bool cma_next_noncma_pagerange(struct zone *zone, int *rindex, + unsigned long *startpfn, unsigned long *endpfn); #else static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gf= p_t gfp) { @@ -93,6 +100,29 @@ static inline bool cma_validate_zones(struct cma *cma) { return false; } + +static inline int cma_numranges(void) +{ + return 0; +} + +static inline unsigned long cma_get_available(const struct cma *cma) +{ + return 0; +} + +static inline bool cma_next_balance_pagerange(struct zone *zone, + struct cma *cma, int *rindex, unsigned long *start_pfn, + unsigned long *end_pfn) +{ + return false; +} + +static inline bool cma_next_noncma_pagerange(struct zone *zone, int *rinde= x, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + return false; +} #endif =20 #endif diff --git a/mm/cma.c b/mm/cma.c index 1f5a7bfc9152..53cb1833407b 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -54,6 +54,11 @@ const char *cma_get_name(const struct cma *cma) return cma->name; } =20 +unsigned long cma_get_available(const struct cma *cma) +{ + return cma->available_count; +} + static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, unsigned int align_order) { @@ -202,6 +207,11 @@ static void __init cma_activate_area(struct cma *cma) static struct cma_memrange **cma_ranges; static int cma_nranges; =20 +int cma_numranges(void) +{ + return cma_nranges; +} + static int cmprange(const void *a, const void *b) { struct cma_memrange *r1, *r2; @@ -214,6 +224,157 @@ static int cmprange(const void *a, const void *b) return r1->base_pfn - r2->base_pfn; } =20 +/* + * Provide the next free range in a cma memory range, as derived + * from the bitmap. + * + * @cmr: memory range to scan + * @start_pfn: the beginning of the previous range + * @end_pfn: the end of the previous range, zero for the first call + * + * The caller can adjust *end_pfn end use it as a starting point. + */ +static bool cma_next_free_range(struct cma_memrange *cmr, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + unsigned long zerobit, onebit, start, nbits, offset, base; + struct cma *cma =3D cmr->cma; + + nbits =3D cma_bitmap_maxno(cmr); + + if (!*end_pfn) + offset =3D start =3D 0; + else { + start =3D ((*end_pfn - cmr->base_pfn) >> cma->order_per_bit); + if (start >=3D nbits) + return false; + + offset =3D *end_pfn - + (cmr->base_pfn + (start << cma->order_per_bit)); + } + + spin_lock_irq(&cma->lock); + zerobit =3D find_next_zero_bit(cmr->bitmap, nbits, start); + if (zerobit >=3D nbits) { + spin_unlock_irq(&cma->lock); + return false; + } + onebit =3D find_next_bit(cmr->bitmap, nbits, zerobit); + spin_unlock_irq(&cma->lock); + + base =3D (zerobit << cma->order_per_bit) + cmr->base_pfn; + *start_pfn =3D base + offset; + *end_pfn =3D base + ((onebit - zerobit) << cma->order_per_bit); + + return true; +} + +static inline bool cma_should_balance_range(struct zone *zone, + struct cma_memrange *cmr) +{ + if (page_zone(pfn_to_page(cmr->base_pfn)) !=3D zone) + return false; + + return true; +} + +/* + * Get the next CMA page range containing pages that have not been + * allocated through cma_alloc. This is just a snapshot, and the caller + * is expected to deal with the changing circumstances. Used to walk + * through CMA pageblocks in a zone in an optimized fashion during + * zone CMA balance compaction. + * + * If @cma is NULL, the global list of ranges is walked, else + * the ranges of the area pointed to by @cma are walked. + */ +bool cma_next_balance_pagerange(struct zone *zone, struct cma *cma, + int *rindex, unsigned long *start_pfn, + unsigned long *end_pfn) +{ + struct cma_memrange *cmr; + int i, nranges; + + if (!cma_nranges) + return false; + + nranges =3D cma ? cma->nranges : cma_nranges; + + if (*rindex =3D=3D -1) { + if (*end_pfn !=3D 0) { + for (i =3D nranges - 1; i >=3D 0; i--) { + cmr =3D cma ? &cma->ranges[i] : cma_ranges[i]; + if (!cma_should_balance_range(zone, cmr)) + continue; + if (*end_pfn > cmr->base_pfn && + *end_pfn < (cmr->base_pfn + cmr->count)) + break; + } + } else { + i =3D nranges - 1; + } + } else { + i =3D *rindex; + } + + for (; i >=3D 0; i--) { + cmr =3D cma ? &cma->ranges[i] : cma_ranges[i]; + if (!cma_should_balance_range(zone, cmr)) + continue; + if (cma_next_free_range(cmr, start_pfn, end_pfn)) { + *rindex =3D i; + return true; + } + } + + return false; +} + +/* + * Get the next stretch of memory in a zone that is not MIGRATE_CMA + * pageblocks. + */ +bool cma_next_noncma_pagerange(struct zone *zone, int *rindex, + unsigned long *start_pfn, + unsigned long *end_pfn) +{ + struct cma_memrange *cmr; + unsigned long cma_start, cma_end; + int i; + + if (*end_pfn >=3D zone_end_pfn(zone)) + return false; + + if (*rindex =3D=3D -1) { + *rindex =3D 0; + if (*start_pfn =3D=3D 0) + *start_pfn =3D zone->zone_start_pfn; + } else { + cmr =3D cma_ranges[*rindex]; + *start_pfn =3D cmr->base_pfn + cmr->count; + } + + for (i =3D *rindex; i < cma_nranges; i++) { + cmr =3D cma_ranges[i]; + cma_start =3D cmr->base_pfn; + cma_end =3D cmr->base_pfn + cmr->count; + if (page_zone(pfn_to_page(cma_start)) !=3D zone) + continue; + if (*start_pfn =3D=3D cma_start) { + *start_pfn =3D cma_end; + } else if (*start_pfn < cma_start) { + *rindex =3D i; + *end_pfn =3D cma_start; + return true; + } + } + + *rindex =3D cma_nranges; + *end_pfn =3D zone_end_pfn(zone); + + return true; +} + static int __init cma_init_reserved_areas(void) { int i, r, nranges; --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pf1-f201.google.com (mail-pf1-f201.google.com [209.85.210.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3D77C28727E for ; Mon, 15 Sep 2025 19:52:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965949; cv=none; b=IG4wvhg0NkxFwdQgcH/fElotdImCIyukc0kNlmIdIV78Pf+597CJ8g/B+4OOias2P3E4KHJE1wna+S/vTUfnA/NKGg05M7H4EtBw2TfBPSPKIzoLt6Oa2UB8n8LjGUnSjDnvuoaKfir8wSTCmxMrIJxpiL7WZKQPOkmUCns/Uj0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965949; c=relaxed/simple; bh=2cRPoohQ0MmOLZTf1mIp9HVeBlRdoTXyPcT7a1QwRos=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=NDWqic7mmWJD8AEAPReiE4hC8scafItqrhk6pvu0MA8pgS9jqkTkhN+5uo9CGj1QTYL/GOb/hFpwZC55nBet6RHmVQAuo/NWuugXL3BQYZruiR5MFlYAw3x1ycZyqOhmgZihSXjb+C96cBA/kl2TiGuEZ66avyOy086D29uvG5E= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=aoHn/lci; arc=none smtp.client-ip=209.85.210.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="aoHn/lci" Received: by mail-pf1-f201.google.com with SMTP id d2e1a72fcca58-76e2eb787f2so4210206b3a.3 for ; Mon, 15 Sep 2025 12:52:27 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965947; x=1758570747; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=P8sdVCwgpzgSZpEZdIa2RxiH6JcLxFU2dfzzWpV7l6M=; b=aoHn/lciK1E0IpL6HkpUhkZXpQSn4JIJZgCuLGh8neIDD29SJpEScJf0Fqb5DYNsgP TFcVKH8WZsMLScRK8XyGvfSWPeuLRlJBQPE+heGSeii9xPF4RW4VqTxzoCGOFYWQuYbK gy0sXt2fNuXeHdXfDDTu3/gBiTALu+MGPrt43KABtPbhojYdhZRXLd727lv2mhi5xTno t2QzYTPJwWCmgxU3mDBnbZ4aNaF22KjojLjOcIZS5Lph28pA8MUIU4oIECnv3NA6jfu6 4J4cYX9dieIz8Df0kz1zGxTVpmt+1JeaRVQrtw4o/gerL9AnuHs8fVkJQGxU29E9jVpw y4vw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965947; x=1758570747; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=P8sdVCwgpzgSZpEZdIa2RxiH6JcLxFU2dfzzWpV7l6M=; b=LnSl0z1ZZfqSjQNceYoddu+5THb4W8TNw4jOQwOTjf4xEQznMDyZsEJYDZ4goiuiPF CSlmiOtmIkzJ7xEMKX6dAlZe9hLhH0SGbnLXSmhY0lDws3rCoOCdkbgCrIFvCN31Os0R F/I43ZKWkJ94hG6rW57EEoYADVz7KyLzC2V1aTrdYcZabLujYIPC5qa4SpM+k7UaR+Km M6GxukPgMPVl8+H0bWNSAKGDCOv/TjZ3L534USqxrISKF+hD/fWF4q6uFb1UjcIOkc56 sVyYvIq/JGAwS/j6aknW7JMVCk5hmYZsPZ31JMo2zUbs4oiIH4q57VslH0B1ETU4kFCM DO2g== X-Forwarded-Encrypted: i=1; AJvYcCU0J7ekzMM1Mdct/BXS10GMa493MgOuh8zEbZU+KpHQw80+56pDVSFeTOGcdNNBANIRURXJm0tFD7ka8bs=@vger.kernel.org X-Gm-Message-State: AOJu0YyCs4cVOWgW87TDvj+OexyZGOimPYbb+u2vYhLVBfmWmaYZpTa/ wHHpopzmqcJSZXBZhV0DoXKfFP52sH2zYMT8YHIJw9gLmhfC8dZHIXxtQQ58120utnW7Kxnj+A= = X-Google-Smtp-Source: AGHT+IHSrj8b6/0anPmsTtmAtpPpKBamD1vvncwaV7EMPgRBfcfV+2TtFZIJ4M8kZJb/oeTBgqjLyGgI X-Received: from pfaq4.prod.google.com ([2002:a05:6a00:a884:b0:772:32b1:58f9]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a00:17a1:b0:772:8694:1d71 with SMTP id d2e1a72fcca58-77611f99ea8mr14148444b3a.0.1757965947238; Mon, 15 Sep 2025 12:52:27 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:47 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-7-fvdl@google.com> Subject: [RFC PATCH 06/12] mm/cma: define and act on CMA_BALANCE flag From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When the CMA_BALANCE flag is set for a CMA area, it means that it opts in to CMA balancing. This means two things: 1) It allows movable allocations to be migrated in to it in the case of a CMA inbalance (too much free memory in CMA pageblocks as compared to other pageblocks). 2) It is allocated top-down, so that compaction will end up migrating pages in to it. Doing this will make sure that compaction doesn't aggrevate a CMA imbalance, and that it won't fight with CMA balance migration from non-CMA to CMA. Signed-off-by: Frank van der Linden --- include/linux/cma.h | 4 +++- mm/cma.c | 33 ++++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 0504580d61d0..6e98a516b336 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -26,6 +26,7 @@ enum cma_flags { __CMA_ZONES_INVALID, __CMA_ACTIVATED, __CMA_FIXED, + __CMA_BALANCE, }; =20 #define CMA_RESERVE_PAGES_ON_ERROR BIT(__CMA_RESERVE_PAGES_ON_ERROR) @@ -33,8 +34,9 @@ enum cma_flags { #define CMA_ZONES_INVALID BIT(__CMA_ZONES_INVALID) #define CMA_ACTIVATED BIT(__CMA_ACTIVATED) #define CMA_FIXED BIT(__CMA_FIXED) +#define CMA_BALANCE BIT(__CMA_BALANCE) =20 -#define CMA_INIT_FLAGS (CMA_FIXED|CMA_RESERVE_PAGES_ON_ERROR) +#define CMA_INIT_FLAGS (CMA_FIXED|CMA_RESERVE_PAGES_ON_ERROR|CMA_BALANCE) =20 struct cma; struct zone; diff --git a/mm/cma.c b/mm/cma.c index 53cb1833407b..6050d57f3c2e 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -272,6 +272,9 @@ static bool cma_next_free_range(struct cma_memrange *cm= r, static inline bool cma_should_balance_range(struct zone *zone, struct cma_memrange *cmr) { + if (!(cmr->cma->flags & CMA_BALANCE)) + return false; + if (page_zone(pfn_to_page(cmr->base_pfn)) !=3D zone) return false; =20 @@ -527,6 +530,12 @@ static bool __init basecmp(struct cma_init_memrange *m= lp, return mlp->base < mrp->base; } =20 +static bool __init revbasecmp(struct cma_init_memrange *mlp, + struct cma_init_memrange *mrp) +{ + return mlp->base > mrp->base; +} + /* * Helper function to create sorted lists. */ @@ -575,7 +584,8 @@ static int __init cma_fixed_reserve(phys_addr_t base, p= hys_addr_t size) } =20 static phys_addr_t __init cma_alloc_mem(phys_addr_t base, phys_addr_t size, - phys_addr_t align, phys_addr_t limit, int nid) + phys_addr_t align, phys_addr_t limit, int nid, + unsigned long flags) { phys_addr_t addr =3D 0; =20 @@ -588,7 +598,8 @@ static phys_addr_t __init cma_alloc_mem(phys_addr_t bas= e, phys_addr_t size, * like DMA/DMA32. */ #ifdef CONFIG_PHYS_ADDR_T_64BIT - if (!memblock_bottom_up() && limit >=3D SZ_4G + size) { + if (!(flags & CMA_BALANCE) && !memblock_bottom_up() + && limit >=3D SZ_4G + size) { memblock_set_bottom_up(true); addr =3D memblock_alloc_range_nid(size, align, SZ_4G, limit, nid, true); @@ -695,7 +706,7 @@ static int __init __cma_declare_contiguous_nid(phys_add= r_t *basep, if (ret) return ret; } else { - base =3D cma_alloc_mem(base, size, alignment, limit, nid); + base =3D cma_alloc_mem(base, size, alignment, limit, nid, flags); if (!base) return -ENOMEM; =20 @@ -851,7 +862,10 @@ int __init cma_declare_contiguous_multi(phys_addr_t to= tal_size, list_for_each_safe(mp, next, &ranges) { mlp =3D list_entry(mp, struct cma_init_memrange, list); list_del(mp); - list_insert_sorted(&final_ranges, mlp, basecmp); + if (flags & CMA_BALANCE) + list_insert_sorted(&final_ranges, mlp, revbasecmp); + else + list_insert_sorted(&final_ranges, mlp, basecmp); sizesum +=3D mlp->size; if (sizesum >=3D total_size) break; @@ -866,7 +880,12 @@ int __init cma_declare_contiguous_multi(phys_addr_t to= tal_size, list_for_each(mp, &final_ranges) { mlp =3D list_entry(mp, struct cma_init_memrange, list); size =3D min(sizeleft, mlp->size); - if (memblock_reserve(mlp->base, size)) { + if (flags & CMA_BALANCE) + start =3D (mlp->base + mlp->size - size); + else + start =3D mlp->base; + + if (memblock_reserve(start, size)) { /* * Unexpected error. Could go on to * the next one, but just abort to @@ -877,9 +896,9 @@ int __init cma_declare_contiguous_multi(phys_addr_t tot= al_size, } =20 pr_debug("created region %d: %016llx - %016llx\n", - nr, (u64)mlp->base, (u64)mlp->base + size); + nr, (u64)start, (u64)start + size); cmrp =3D &cma->ranges[nr++]; - cmrp->base_pfn =3D PHYS_PFN(mlp->base); + cmrp->base_pfn =3D PHYS_PFN(start); cmrp->early_pfn =3D cmrp->base_pfn; cmrp->count =3D size >> PAGE_SHIFT; cmrp->cma =3D cma; --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pf1-f201.google.com (mail-pf1-f201.google.com [209.85.210.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D835E28C5DE for ; Mon, 15 Sep 2025 19:52:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965951; cv=none; b=ByOC1d+Mk2QOaA+QTiRVzZQL3hxBIuNoN6XJEJ8F3/bkkDiXfDvHJdAM/0TZ3fNFyCJ+lgNiZoUg3NGhQPu1PXVkrrbna4S6LvyEQxJv9M0NKnv27H9H+LoljDiAJR9gLu43o3+rgPCxuNPlML/HVKCLQWoapxVXDGyCpaL6Xgk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965951; c=relaxed/simple; bh=RhVlmsWTYqoH5W1St65CGuBNh9719pQ0bYzXCGJTNEk=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=VP7RAcg1uphxB/pCi56NUl8/go/JW0nksn4OugL7TpFAhd3XH3xDs0Id+uzpO8sGxvEdBOzi6mu130LD4qfUO5V2mC3jM57uSUL2O04ISi0DrNZaKmpItIn3ocYvKO2GEuluxBrAWICmoIvte1YCAX/PL1yG1L+Mq7eWr8wpW3o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=B8uFDZ6B; arc=none smtp.client-ip=209.85.210.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="B8uFDZ6B" Received: by mail-pf1-f201.google.com with SMTP id d2e1a72fcca58-77422201fd8so3846090b3a.1 for ; Mon, 15 Sep 2025 12:52:29 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965949; x=1758570749; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=0svEd+gVOCVsfCPBdmnwBog7VppNcoJfmCv6ADiIbr8=; b=B8uFDZ6BNplEqvOGZx4qKLGCW/At/gjmtg4v7i6rtXfuDEjBjYiUB4GIXkOhDQOzSc XEe0soQRuHiQFfYKaWPZqrPjdkETAYstIyPBFdWrbPe5qwEw3gkjJOVVuezSOauuJeSl f+02XYIJywWqCFOQXEq9tgXBL0DbuazceY5fbb4sJvdyTlxb5uVc9bt4XyYADPiSeXSz HzKfKm0XIE86tk2umfO1arWApAyNvhTDHGgkKfRKvGe2/KhC4aa4uxQN89s9SIjllSUd LSeM4UT/kBvvW0JJCqPFD0EA48INEXS3aENiq2FcQsHu9hfMjR8wOArNt746+e30vBod jaeQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965949; x=1758570749; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=0svEd+gVOCVsfCPBdmnwBog7VppNcoJfmCv6ADiIbr8=; b=dYHu1wN5c7LQCK0pE7d5O5q23ehJz7bZoLsN1zb9J1KEUAgTWgkvdZ3eV4DjJL4t4g h7BNkwn+clw152HL31nk7kQI8ET2WgYQTQRfPWLzYjrv+VuPAEBdwOsbGSdvug00CcmR eRijEGQpGWwZS71JZWHyvBTJBcpdf4WddDTkl/tD2GiIozfIwZ4YIOgWArIPuDVidiiU zP/pJ9kCwU6jn1anqoGJ1KIrNKbXkSf3QYhn2t6ikHrGFhW14ZaXhvTvk9EcapramAr6 C42PCj01MF9+Mrp9ohb/LrKJ0h2IYB7nqAa2J6/Lrgzh4JuxERifJwOOI+5HZRCoflyU +/Ig== X-Forwarded-Encrypted: i=1; AJvYcCVTkRTs690Rxq6hxTxga9DFoXbaMrq0wHRknxbYeDldYcnq2vxa0l2uWxgqTBjrYhnv2EmC+r8NmJrnd8k=@vger.kernel.org X-Gm-Message-State: AOJu0Yw9etJOEI9rLtWw+W04czlUzBIlmuBnS7zAf4wbcMnha8kvcEXH u7EOn5wLbByEYl8cy7CP103R4LPAaN8BkEabGxZWRypBOHeHHL6ZAvVSLMAjvyrCFPiEfhrsWg= = X-Google-Smtp-Source: AGHT+IGSSnGP60FTHWub2ijESGSasTj/5U4SKgrsznRVHg7jm6k4jw2NVaDNXlZHD1qi1VzPFvCEZCsL X-Received: from pjbhl16.prod.google.com ([2002:a17:90b:1350:b0:32d:a0b1:2b03]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a20:6705:b0:24c:cb06:f0db with SMTP id adf61e73a8af0-2602bb594afmr12621549637.35.1757965949116; Mon, 15 Sep 2025 12:52:29 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:48 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-8-fvdl@google.com> Subject: [RFC PATCH 07/12] mm/compaction: optionally use a different isolate function From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" For CMA balancing, the compaction migration hooks can largely be reused, except a different function to isolate free pages is needed. So, add a pointer to the isolation function in the compact_control structure. If it's not NULL, use it, else use isolate_freepages as usual. No functional change. Signed-off-by: Frank van der Linden Acked-by: Rik van Riel --- mm/compaction.c | 5 ++++- mm/internal.h | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index bf021b31c7ec..6a2c06e356c5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1813,7 +1813,10 @@ static struct folio *compaction_alloc_noprof(struct = folio *src, unsigned long da if (start_order =3D=3D NR_PAGE_ORDERS) { if (has_isolated_pages) return NULL; - isolate_freepages(cc); + if (cc->isolate_freepages) + cc->isolate_freepages(cc); + else + isolate_freepages(cc); has_isolated_pages =3D true; goto again; } diff --git a/mm/internal.h b/mm/internal.h index 45b725c3dc03..7916d8be8922 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -873,6 +873,11 @@ struct compact_control { unsigned int nr_freepages; /* Number of isolated free pages */ unsigned int nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ + /* + * Function to use to isolate free pages, if applicable. If NULL, + * default to isolate_freepages(). + */ + void (*isolate_freepages)(struct compact_control *cc); /* * Acts as an in/out parameter to page isolation for migration. * isolate_migratepages uses it as a search base. --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pl1-f201.google.com (mail-pl1-f201.google.com [209.85.214.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6430D28D8D0 for ; Mon, 15 Sep 2025 19:52:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965953; cv=none; b=fCezARQngFUSYDRIfhtsI4Gfi58FS5JdCjjTZDLvMO0sRbE2mWHm6viWjWhnMi3Sw1D7WXWHgSdABqFN5iqwaJjeZ8zzm6rmKrvpk2KXf9kGGhToVpeNI3FbsRHsBa0PglNFR63MyAuQRLe+npcydYlMkJAKr5pwAfloQaBfDSo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965953; c=relaxed/simple; bh=LlIK5l7n4robJLODbHBAVI4BsoX7LlGnbeStJBZkhzw=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=l9sVA0/NOd2CjNO4FJyqdIm7TfkrPecpF9UT8E4SE715zz1Q/+CGF1mGG9lWGu8tOtN9jARZw7KP7oWZSDghXEcYUy/42ed78OgPlC+iPT6OuyoTGPAltIo/nOuhIk6V7+pDrghSpmIzbi+jU5J/7meRHzP92GYQWAfIJO3dG5U= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=dwEv+7/F; arc=none smtp.client-ip=209.85.214.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="dwEv+7/F" Received: by mail-pl1-f201.google.com with SMTP id d9443c01a7336-2621fab9befso22082475ad.2 for ; Mon, 15 Sep 2025 12:52:31 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965951; x=1758570751; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=RZKjZrunq5am07txBDDkuCSvylPNhhpycrLLY5C5zoU=; b=dwEv+7/FCgTJYrbC/AQ2sq+12KI7crGkV5SR0iyq39ixFcU0PW4smElbELsPAtdtcl j1Rfl7BzKhbBO8yz1Zin18kqrA2hLftZRZ8qm4Z49NDo8vcHcfhjMTDM1QOj1dac4LXG TnGbgUeF8+aoGVfGfWV8We7LSQ7MMQ5ys4ocY2iDLAOei3myHGcAQo1sLhXE10ldolVQ Z+F03/mgyhW2xoIOY/J4tLpmrIc1D95rBuj5WEwcvdRufKekFpjd0wZvek52jkFMXvZQ lZltiWJAIoD9HFy7oCXpHtDshJ0s/Pzvvl6OKKcmvt+pyQ09fZ4Z91rdmCxAaOWYZUFe svwg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965951; x=1758570751; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=RZKjZrunq5am07txBDDkuCSvylPNhhpycrLLY5C5zoU=; b=RpmhmCr4fEyc1qBM9xxRVxANhY7KpUXWks9QmF5egxo6SGg7aM7Wxmxp6frc/Vcpjg FMvXnUwF7zpsRRYce8JOBwzRYNEz0PEWKGj+3gKAJ6zFvwp/V5PFZMNrDgry7Cv5zTfg 1p5xRcwLjYdhyJpeAL5D3rOV40q94fNRGWaTQoEd8I281LkGmgyy5itKfWe/ZcyWaW6I uhh/LBnh+0Twdm5XAnQpQaQhjqsmwUdlKkIXmPIE+covUcBTllb0jGeI9iilbXOWsCOS vmSiSwnHODLFr/dgfgqXFf+rPQljdj6Wtr1/WcJB2ROtmC/KLQWP+95jNCR8cy3eS+Pm jehg== X-Forwarded-Encrypted: i=1; AJvYcCX3jZ02TxRYHW4Jhy6myhRlJI+/Rzh1WnzRZMcR6fKUsAmpp6W1e/XAw9Aoff57Zg8jpxujVOKWw0UCBk4=@vger.kernel.org X-Gm-Message-State: AOJu0YzkBnlpYHr0Xjfa52UMCyVklrc/TNpUDHh3iMCUTSfztCl+wU8G PMVzbXV2xCAZzv9YmAJ25aC/VMTzMdCX0DTUfWfEM0XcfZEi6xIBiw3ljwhfXar4ym0jWqI4+g= = X-Google-Smtp-Source: AGHT+IH9QOqt/yj9ycFYjOQ2rL3lizwBlj25FoMjJ5FB/agDQXZSFKAbBpu7CnQQ54uijVVe5BYAlfIL X-Received: from plgi11.prod.google.com ([2002:a17:902:cf0b:b0:24a:ff7a:4c65]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:902:e78b:b0:25a:324a:9af1 with SMTP id d9443c01a7336-25d26e42154mr150527385ad.38.1757965950699; Mon, 15 Sep 2025 12:52:30 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:49 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-9-fvdl@google.com> Subject: [RFC PATCH 08/12] mm/compaction: simplify isolation order checks a bit From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The code to isolate pages for migration always checked both cc->alloc_contig and skip_isolation_on_order to determine whether a page could be isolated for migration. Simplify this a little bit by moving the cc->alloc_contig check in to skip_isolation_on_order. Also rename alloc_contig to migrate_large, since there will be an additional user (CMA balancing) of this field soon, not just alloc_contig_range. No functional change. Signed-off-by: Frank van der Linden Acked-by: Rik van Riel --- mm/compaction.c | 26 ++++++++++++++------------ mm/internal.h | 2 +- mm/page_alloc.c | 2 +- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 6a2c06e356c5..2e6c30f50b89 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -793,13 +793,15 @@ static bool too_many_isolated(struct compact_control = *cc) /** * skip_isolation_on_order() - determine when to skip folio isolation base= d on * folio order and compaction target order + * @cc: compact control structure containing target order * @order: to-be-isolated folio order - * @target_order: compaction target order * * This avoids unnecessary folio isolations during compaction. */ -static bool skip_isolation_on_order(int order, int target_order) +static bool skip_isolation_on_order(struct compact_control *cc, int order) { + if (cc->migrate_large) + return false; /* * Unless we are performing global compaction (i.e., * is_via_compact_memory), skip any folios that are larger than the @@ -807,7 +809,7 @@ static bool skip_isolation_on_order(int order, int targ= et_order) * the desired target_order, so migrating this folio would likely fail * later. */ - if (!is_via_compact_memory(target_order) && order >=3D target_order) + if (!is_via_compact_memory(cc->order) && order >=3D cc->order) return true; /* * We limit memory compaction to pageblocks and won't try @@ -850,6 +852,7 @@ isolate_migratepages_block(struct compact_control *cc, = unsigned long low_pfn, unsigned long next_skip_pfn =3D 0; bool skip_updated =3D false; int ret =3D 0; + unsigned int order; =20 cc->migrate_pfn =3D low_pfn; =20 @@ -948,13 +951,13 @@ isolate_migratepages_block(struct compact_control *cc= , unsigned long low_pfn, } =20 if (PageHuge(page)) { - const unsigned int order =3D compound_order(page); /* * skip hugetlbfs if we are not compacting for pages * bigger than its order. THPs and other compound pages * are handled below. */ - if (!cc->alloc_contig) { + if (!cc->migrate_large) { + order =3D compound_order(page); =20 if (order <=3D MAX_PAGE_ORDER) { low_pfn +=3D (1UL << order) - 1; @@ -962,7 +965,7 @@ isolate_migratepages_block(struct compact_control *cc, = unsigned long low_pfn, } goto isolate_fail; } - /* for alloc_contig case */ + /* for migrate_large case */ if (locked) { unlock_page_lruvec_irqrestore(locked, flags); locked =3D NULL; @@ -1030,11 +1033,11 @@ isolate_migratepages_block(struct compact_control *= cc, unsigned long low_pfn, * skip them at once. The check is racy, but we can consider * only valid values and the only danger is skipping too much. */ - if (PageCompound(page) && !cc->alloc_contig) { - const unsigned int order =3D compound_order(page); + if (PageCompound(page)) { + order =3D compound_order(page); =20 /* Skip based on page order and compaction target order. */ - if (skip_isolation_on_order(order, cc->order)) { + if (skip_isolation_on_order(cc, order)) { if (order <=3D MAX_PAGE_ORDER) { low_pfn +=3D (1UL << order) - 1; nr_scanned +=3D (1UL << order) - 1; @@ -1182,9 +1185,8 @@ isolate_migratepages_block(struct compact_control *cc= , unsigned long low_pfn, /* * Check LRU folio order under the lock */ - if (unlikely(skip_isolation_on_order(folio_order(folio), - cc->order) && - !cc->alloc_contig)) { + order =3D folio_order(folio); + if (unlikely(skip_isolation_on_order(cc, order))) { low_pfn +=3D folio_nr_pages(folio) - 1; nr_scanned +=3D folio_nr_pages(folio) - 1; folio_set_lru(folio); diff --git a/mm/internal.h b/mm/internal.h index 7916d8be8922..ffcb3aec05ed 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -909,7 +909,7 @@ struct compact_control { * isolation or migration failures to * ensure forward progress. */ - bool alloc_contig; /* alloc_contig_range allocation */ + bool migrate_large; /* Always migrate large/huge pages */ }; =20 /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d3966d31c039..dc59aaa63ae6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6903,7 +6903,7 @@ int alloc_contig_range_noprof(unsigned long start, un= signed long end, .mode =3D MIGRATE_SYNC, .ignore_skip_hint =3D true, .no_set_skip_hint =3D true, - .alloc_contig =3D true, + .migrate_large =3D true, }; INIT_LIST_HEAD(&cc.migratepages); enum pb_isolate_mode mode =3D (alloc_flags & ACR_FLAGS_CMA) ? --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pl1-f202.google.com (mail-pl1-f202.google.com [209.85.214.202]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E8D4328751F for ; Mon, 15 Sep 2025 19:52:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.202 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965954; cv=none; b=HxO5sZjMqbNC9xRQx3sooE6uT2fK7hC/aHRdKIXwftrZqd/ozHA7REcvRoym6bfPx4Wlj5V2Lvm1VA8wgUPfbfD81A8AwBVUq7vepdllg7OsRErET1Wajh+qCKtDH4ez2LM/gpEXo4ZaiqEJOmanVTSRJ6JF7v/UcQc6D56CKyE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965954; c=relaxed/simple; bh=E/S4wLBAzx3NQ/Hxx3Nf5H4FA79Ki+KpoQsmHrveUyg=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=VUyVuY+aI4w6eelY8nNo5P4mM5Zh8S+jE5CWEbBGfeya3vHwwhmqxfCDTfg3EfNWpf6mtz2jiPDCp5OKJgCGIPlvM2o+3IZa6bFxh4rmbthoX69rwAvL0naSprkPRkt3Dl+F7/wIY7+OOj840T+NjiPCTgwen0enZZ92kCWQIJs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=xL1W9xiT; arc=none smtp.client-ip=209.85.214.202 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="xL1W9xiT" Received: by mail-pl1-f202.google.com with SMTP id d9443c01a7336-244582bc5e4so55267475ad.2 for ; Mon, 15 Sep 2025 12:52:32 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965952; x=1758570752; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=5DjbF4KpmP4NywIoDsroRavthjlO6MqxRwXYsW+YSfA=; b=xL1W9xiTW7CB5hNfl9ll2/sM1zVleuI4v4W7Ih+Q5FPPwjGM+p9jK+VRbyz5u8UKkM 2NgDQPMSJS4QFrUkQmggsNOHVogQgSYiSjXq/BO/di5jdvLYolOSTK80DVpp9xwTQ3qd miyqeBPB9gnU6iAkOnx/96s/dLJpKG0Ny2GAT97eNrasBp1Z2S59jwmcGHDzllaascqH 6Qz+M5cqI6ZnTlGks2u6LeobsWAyk3FMW1a/XpZSPP2BYEUW5Nnim4Z+FSF4GHNNCDRS ZYl3BmmsZWtfsIbDDLlmMx2nJQ4mUG5Jj6eEnc9T+cejnzIqNmGY3e/1QUs/9iVEEDTs tcJg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965952; x=1758570752; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=5DjbF4KpmP4NywIoDsroRavthjlO6MqxRwXYsW+YSfA=; b=X8711CGpUbkVKDxkNJ73SUrDhg1XkyxWlbkGiN7VKkeGUJFsPtA+14Pt+3MlbbckS5 Rucy++eIum4vROsYtk/2+sr5V7Se86c0qPcJ7Nb3DOS7SdF3L89ODCZfOkbOnsi9LSUF EerXGg407pLWBB47yNzXqZex/kW6moO3MkSiGEDL4xQLu+q7z2TuA5SEKrd+HjcovEU0 uPxtlNwjz7bOGyJMmGzYQELkXWqtkWD6hMoeF/Xo6yjHCWpOBkMVNJBJI4p0u7B/LXxg rphPN+Iia/bZhxVkI4PzGdTYhfCbv3q6wns5Bxt8VLtGbxFXaRobKqEstoFoHvmP58mP ECTg== X-Forwarded-Encrypted: i=1; AJvYcCVRe+V26gMkn/rgN5L0PVuJ75MtUGzJRvah1u52Y8lFqp+SWWQGItLvx3SrPJrjqIaS/ywaqEeVF1xYaYY=@vger.kernel.org X-Gm-Message-State: AOJu0YyOVJAT3/eGULCfDwd6q3Uwh7KrcVLkzWZ2g3Y3mym+ULUSsa4C 0TyYj3CZmJBmMntYwJqY/RDhHZhbB2nQDM4tL9RjEan7sjb+Tgqj2M7DjZc4hBqM6Ud+UI1ssg= = X-Google-Smtp-Source: AGHT+IF8O+VLtGgAGnPoMFPwPtML+wPGmo10/Gb9GlzY+4KfLm4QkQw7DA2FoDyUrRDSv5yLbXqWtzOk X-Received: from plbiz4.prod.google.com ([2002:a17:902:ef84:b0:24c:863e:86a3]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:902:dacd:b0:25d:8043:781d with SMTP id d9443c01a7336-25d804381bcmr162786945ad.21.1757965952236; Mon, 15 Sep 2025 12:52:32 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:50 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-10-fvdl@google.com> Subject: [RFC PATCH 09/12] mm/cma: introduce CMA balancing From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" A longstanding problem with having a lot of CMA pageblocks in the system (through hugetlb_cma), is that this limits the amount of memory that the kernel can use for its allocations. Kernel allocations are unmovable and can not come from CMA pageblocks. This can lead to situations where kernel allocations cause OOMs, when in fact there might still enough memory available. There isn't much that can be done if the non-CMA part of memory is already taken up by unmovable allocations. That scenario can be considered a misconfigured system. But if there are movable allocations in the non-CMA areas, they are unnecessarily taking away space from the kernel. Currently, the page allocator tries to avoid this scenario by allocating from CMA first if more than half of free pages in a zone come from CMA. But that's not a guarantee. For example, take the case where a lot of memory is being taken up by 1G hugetlb pages, allocated from hugetlb_cma, and that the hugetlb_cma area has been fully used by hugetlbfs. This means that new movable allocations will land in the non-CMA part of memory, and that the kernel may come under memory pressure. If those allocations are long-lasting, freeing up hugetlb pages will not reduce that pressure, since the kernel can't use the new space, and the long-lasting allocations residing in non-CMA memory will stay put. To counter this issue, introduce interfaces to explicitly move pages in to CMA areas. The number of pages moved depends on cma_first_limit. It will use that percentage to calculate the target number of pages that should be moved. A later commit will call one of these interfaces to move pages to CMA if needed, after CMA-allocated hugetlb pages have been freed. Signed-off-by: Frank van der Linden Reviewed-by: Rik van Riel --- include/linux/migrate_mode.h | 1 + include/trace/events/migrate.h | 3 +- mm/compaction.c | 168 +++++++++++++++++++++++++++++++++ mm/internal.h | 4 + 4 files changed, 175 insertions(+), 1 deletion(-) diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index 265c4328b36a..3e235499cd73 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -25,6 +25,7 @@ enum migrate_reason { MR_LONGTERM_PIN, MR_DEMOTION, MR_DAMON, + MR_CMA_BALANCE, MR_TYPES }; =20 diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index cd01dd7b3640..53d669ee26be 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -23,7 +23,8 @@ EM( MR_CONTIG_RANGE, "contig_range") \ EM( MR_LONGTERM_PIN, "longterm_pin") \ EM( MR_DEMOTION, "demotion") \ - EMe(MR_DAMON, "damon") + EM( MR_DAMON, "damon") \ + EMe(MR_CMA_BALANCE, "cma_balance") =20 /* * First define the enums in the above macros to be exported to userspace diff --git a/mm/compaction.c b/mm/compaction.c index 2e6c30f50b89..3200119b8baf 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "internal.h" =20 #ifdef CONFIG_COMPACTION @@ -2512,6 +2513,173 @@ compaction_suit_allocation_order(struct zone *zone,= unsigned int order, return COMPACT_CONTINUE; } =20 +#ifdef CONFIG_CMA + +static void +isolate_free_cma_pages(struct compact_control *cc) +{ + unsigned long end_pfn, pfn, next_pfn, start_pfn; + int i; + + i =3D -1; + end_pfn =3D 0; + + next_pfn =3D end_pfn =3D cc->free_pfn; + start_pfn =3D 0; + while (cc->nr_freepages < cc->nr_migratepages) { + if (!cma_next_balance_pagerange(cc->zone, cc->cma, &i, + &start_pfn, &end_pfn)) + break; + for (pfn =3D start_pfn; pfn < end_pfn; pfn =3D next_pfn) { + next_pfn =3D pfn + pageblock_nr_pages; + isolate_freepages_block(cc, &pfn, next_pfn, + cc->freepages, 1, false); + if (cc->nr_freepages >=3D cc->nr_migratepages) + break; + } + } + cc->free_pfn =3D next_pfn; +} + +static void balance_zone_cma(struct zone *zone, struct cma *cma) +{ + struct compact_control cc =3D { + .zone =3D zone, + .cma =3D cma, + .isolate_freepages =3D isolate_free_cma_pages, + .nr_migratepages =3D 0, + .nr_freepages =3D 0, + .free_pfn =3D 0, + .migrate_pfn =3D 0, + .mode =3D MIGRATE_SYNC, + .ignore_skip_hint =3D true, + .no_set_skip_hint =3D true, + .gfp_mask =3D GFP_KERNEL, + .migrate_large =3D true, + .order =3D -1, + }; + unsigned long nr_pages; + int order; + unsigned long free_cma, free_pages, allocated, allocated_noncma; + unsigned long target_free_cma; + int rindex, ret =3D 0, n; + unsigned long start_pfn, end_pfn, pfn, next_pfn; + long nr_migrated; + + if (zone_idx(zone) =3D=3D ZONE_MOVABLE) + return; + + if (!cma && !cma_numranges()) + return; + + /* + * Try to move allocated pages from non-CMA pageblocks + * to CMA pageblocks (possibly in a specific CMA area), to + * give the kernel more space for unmovable allocations. + * + * cma_first_limit, the percentage of free pages that are + * MIGRATE_CMA, is used to calculcate the target number. + */ + free_pages =3D zone_page_state(zone, NR_FREE_PAGES); + free_cma =3D zone_page_state(zone, NR_FREE_CMA_PAGES); + if (!free_cma) + return; + + target_free_cma =3D (cma_first_limit * free_pages) / 100; + /* + * If we're already below the target, nothing to do. + */ + if (free_cma <=3D target_free_cma) + return; + + /* + * To try to avoid scanning too much non-CMA memory, + * set the upper bound of pages we want to migrate + * to the minimum of: + * 1. The number of MIGRATE_CMA pages we want to use. + * 2. The space available in the targeted CMA area (if any). + * 3. The number of used non-CMA pages. + * + * This will still likely cause the scanning of more + * pageblocks than is strictly needed, but it's the best + * that can be done without explicit tracking of the number + * of movable allocations in non-CMA memory. + */ + allocated =3D zone_managed_pages(zone) - free_pages; + allocated_noncma =3D allocated - (zone_cma_pages(zone) - free_cma); + + nr_pages =3D free_cma - target_free_cma; + if (cma) + nr_pages =3D min(nr_pages, cma_get_available(cma)); + nr_pages =3D min(allocated_noncma, nr_pages); + + for (order =3D 0; order < NR_PAGE_ORDERS; order++) + INIT_LIST_HEAD(&cc.freepages[order]); + INIT_LIST_HEAD(&cc.migratepages); + + rindex =3D -1; + start_pfn =3D next_pfn =3D end_pfn =3D 0; + nr_migrated =3D 0; + while (nr_pages > 0) { + ret =3D 0; + if (!cma_next_noncma_pagerange(cc.zone, &rindex, + &start_pfn, &end_pfn)) + break; + + for (pfn =3D start_pfn; pfn < end_pfn; pfn =3D next_pfn) { + next_pfn =3D pfn + pageblock_nr_pages; + cc.nr_migratepages =3D 0; + + if (!pageblock_pfn_to_page(pfn, next_pfn, zone)) + continue; + + ret =3D isolate_migratepages_block(&cc, pfn, next_pfn, + ISOLATE_UNEVICTABLE); + if (ret) + continue; + ret =3D migrate_pages(&cc.migratepages, compaction_alloc, + compaction_free, (unsigned long)&cc, + cc.mode, MR_CMA_BALANCE, &n); + if (ret) + putback_movable_pages(&cc.migratepages); + nr_migrated +=3D n; + if (nr_migrated >=3D nr_pages) + break; + } + + nr_pages -=3D min_t(unsigned long, nr_migrated, nr_pages); + } + + if (cc.nr_freepages > 0) + release_free_list(cc.freepages); +} + +void balance_node_cma(int nid, struct cma *cma) +{ + pg_data_t *pgdat; + int zoneid; + struct zone *zone; + + if (!cma && !cma_numranges()) + return; + + if (nid >=3D MAX_NUMNODES || !node_online(nid)) + return; + + pgdat =3D NODE_DATA(nid); + + for (zoneid =3D 0; zoneid < MAX_NR_ZONES; zoneid++) { + + zone =3D &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + balance_zone_cma(zone, cma); + } +} + +#endif /* CONFIG_CMA */ + static enum compact_result compact_zone(struct compact_control *cc, struct capture_control *capc) { diff --git a/mm/internal.h b/mm/internal.h index ffcb3aec05ed..7dcaf7214683 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -857,6 +857,8 @@ void memmap_init_range(unsigned long, int, unsigned lon= g, unsigned long, =20 #if defined CONFIG_COMPACTION || defined CONFIG_CMA =20 +struct cma; + /* * in mm/compaction.c */ @@ -887,6 +889,7 @@ struct compact_control { unsigned long migrate_pfn; unsigned long fast_start_pfn; /* a pfn to start linear scan from */ struct zone *zone; + struct cma *cma; /* if moving to a specific CMA area */ unsigned long total_migrate_scanned; unsigned long total_free_scanned; unsigned short fast_search_fail;/* failures to use free list searches */ @@ -938,6 +941,7 @@ struct cma; #ifdef CONFIG_CMA void *cma_reserve_early(struct cma *cma, unsigned long size); void init_cma_pageblock(struct page *page); +void balance_node_cma(int nid, struct cma *cma); #else static inline void *cma_reserve_early(struct cma *cma, unsigned long size) { --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pg1-f202.google.com (mail-pg1-f202.google.com [209.85.215.202]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BA8CB28C5DE for ; Mon, 15 Sep 2025 19:52:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.202 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965956; cv=none; b=M4DclcDvEZ5lsvBd78QjukRaoyLDdMmCcQGNDrHJ8AKSAlBOs3ye7/mePjyAce/IfoZi/TbK7WrJW0YVrpSUZR/S+NE29xFlidT/8cdDdpfVN4P99t24B3znCs2yGFNzEd5W0nJ7n2IXds07s02E+g5Gv4tEDebEV0uXjLEIZ8E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965956; c=relaxed/simple; bh=gb0XDeyjnwh5CMarnGIWP/LYrRFNSf8Mf19XOje2tfE=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=hJstyFDBkWFP9rN+13NFKo2NPzm+/k0t8NS2gL0Hq1CRHj0FFyvXBCQZSGp9S5YfkOIqONrQp7+gAXsg9IAQkt/4E3kr+fRrnd9pM2b4yxlap3kduCTOp12uhQv8YdmC3EQMR8/pKx7kgnbYsUU/TGh/6hc7G5Kq7qkzQoUKLuQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=Me5i/rrw; arc=none smtp.client-ip=209.85.215.202 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="Me5i/rrw" Received: by mail-pg1-f202.google.com with SMTP id 41be03b00d2f7-b4f87c691a7so7285550a12.3 for ; Mon, 15 Sep 2025 12:52:34 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965954; x=1758570754; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=2MjOcVaMGZCYXnwmJQZ57Yx+f6ZupaH/8SoD2+GME4k=; b=Me5i/rrwA6TXI6P7eAbSOvlWuCHZjptlnXd8vVmEVYntTwyjmuzzBHXB0BAnM+PrCv 3cCte+Yw7dMS8PifLG6IZug/VlKUCuAU970di78+OteRS56aqh8meir21ici78+81Yeo g08raB2K+0f4FVDlHPZmNepMEoVFd9+a7x/NIHRPGn3A6uZOUQn5FFIbDvv5nWVzQpDH TU79u1EqSg4rIvXUP36GAOMR1JvEB+UkvAP1GUUKXMaGcWXJeAi587d1JSbIpbYRa3qZ z82PTViy4DFBd5iQ9W1FOw3OIXK1K09iimdRi+K7mKxwssQu3tY6bkytgIyhWy036GOA WI5A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965954; x=1758570754; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=2MjOcVaMGZCYXnwmJQZ57Yx+f6ZupaH/8SoD2+GME4k=; b=FbGPikzPZXOfeeuY8L91YpoXxE+4WFUMa/gcOZOqWoA3GAhTypmzkyZ8LLsHbjGmGP msNd3PGezXwIv99tyowLzNScjVLSzkwMahsfrmCwfXL4PB0gEf5F+dhLP3qxQBWsMLGV wpzDCSOcr678BSM1Vbf3T3W0mR5WYeZfdE381WnwwaaWCRrZlLbJ2rsnCMWqKdoBXdNX BOiTgVlfPJMRpYAA0sCIuZoZkFo+jzmtFjbMD2bWIpCsKSGi38bZmzdUQBuLvWhdjStd UVMA95In3DPsxGIQ83mZ6twxZYxArH5PfQ1emZIiMGiqMPJuWNJlRFINy+Sl9v2tMuPh VMdQ== X-Forwarded-Encrypted: i=1; AJvYcCXbx+lPZTTrggBCulpsTHtOXQ5FjE5+AEqq7otRPZV7twpeHvstrR1gOoiB4MbzlLOVyWQN1hfqMetg4pQ=@vger.kernel.org X-Gm-Message-State: AOJu0Yys9R2E/4zvZciImFybujoNYcetr/Ke7xYfy72qDUKjJHn1gLfL ctmGWb1qPYjawWAfAfVY3XjQ9t07cmiE3pUL60F/Fbx4RTyJKOJSF8iE6WSJJfU3vB7jZMX57A= = X-Google-Smtp-Source: AGHT+IG992kJ91SBgyRztXEp8ayRWD1xxYpYbjOHc4swJ2FjbvDWoErXzOXlzWzoeDsx/LEygczXUUjX X-Received: from pjbqd5.prod.google.com ([2002:a17:90b:3cc5:b0:32e:6866:664]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:90b:4f86:b0:32e:18b2:5a45 with SMTP id 98e67ed59e1d1-32e18b25f33mr9160181a91.5.1757965953984; Mon, 15 Sep 2025 12:52:33 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:51 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-11-fvdl@google.com> Subject: [RFC PATCH 10/12] mm/hugetlb: do explicit CMA balancing From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" CMA areas are normally not very large, but HugeTLB CMA is an exception. hugetlb_cma, used for 'gigantic' pages (usually 1G), can take up many gigabytes of memory. As such, it is potentially the largest source of 'false OOM' conditions, situations where the kernel runs out of space for unmovable allocations, because it can't allocate from CMA pageblocks, and non-CMA memory has been tied up by other movable allocations. The normal use case of hugetlb_cma is a system where 1G hugetlb pages are sometimes, but not always, needed, so they need to be created and freed dynamically. As such, the best time to address CMA memory imbalances is when CMA hugetlb pages are freed, making multiples of 1G available as buddy managed CMA pageblocks. That is a good time to check if movable allocations fron non-CMA pageblocks should be moved to CMA pageblocks to give the kernel more breathing space. Do this by calling balance_node_cma on either the hugetlb CMA area for the node that just had its number of hugetlb pages reduced, or for all hugetlb CMA areas if the reduction was not node-specific. To have the CMA balancing code act on the hugetlb CMA areas, set the CMA_BALANCE flag when creating them. Signed-off-by: Frank van der Linden --- mm/hugetlb.c | 14 ++++++++------ mm/hugetlb_cma.c | 16 ++++++++++++++++ mm/hugetlb_cma.h | 5 +++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index eed59cfb5d21..611655876f60 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3971,12 +3971,14 @@ static int set_max_huge_pages(struct hstate *h, uns= igned long count, int nid, =20 list_add(&folio->lru, &page_list); } - /* free the pages after dropping lock */ - spin_unlock_irq(&hugetlb_lock); - update_and_free_pages_bulk(h, &page_list); - flush_free_hpage_work(h); - spin_lock_irq(&hugetlb_lock); - + if (!list_empty(&page_list)) { + /* free the pages after dropping lock */ + spin_unlock_irq(&hugetlb_lock); + update_and_free_pages_bulk(h, &page_list); + flush_free_hpage_work(h); + hugetlb_cma_balance(nid); + spin_lock_irq(&hugetlb_lock); + } while (count < persistent_huge_pages(h)) { if (!adjust_pool_surplus(h, nodes_allowed, 1)) break; diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c index 71d0e9a048d4..c0396d35b5bf 100644 --- a/mm/hugetlb_cma.c +++ b/mm/hugetlb_cma.c @@ -276,3 +276,19 @@ bool __init hugetlb_early_cma(struct hstate *h) =20 return hstate_is_gigantic(h) && hugetlb_cma_only; } + +void hugetlb_cma_balance(int nid) +{ + int node; + + if (nid !=3D NUMA_NO_NODE) { + if (hugetlb_cma[nid]) + balance_node_cma(nid, hugetlb_cma[nid]); + } else { + for_each_online_node(node) { + if (hugetlb_cma[node]) + balance_node_cma(node, + hugetlb_cma[node]); + } + } +} diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h index f7d7fb9880a2..2f2a35b56d8a 100644 --- a/mm/hugetlb_cma.h +++ b/mm/hugetlb_cma.h @@ -13,6 +13,7 @@ bool hugetlb_cma_exclusive_alloc(void); unsigned long hugetlb_cma_total_size(void); void hugetlb_cma_validate_params(void); bool hugetlb_early_cma(struct hstate *h); +void hugetlb_cma_balance(int nid); #else static inline void hugetlb_cma_free_folio(struct folio *folio) { @@ -53,5 +54,9 @@ static inline bool hugetlb_early_cma(struct hstate *h) { return false; } + +static inline void hugetlb_cma_balance(int nid) +{ +} #endif #endif --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pf1-f201.google.com (mail-pf1-f201.google.com [209.85.210.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3D84E298CC7 for ; Mon, 15 Sep 2025 19:52:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965957; cv=none; b=PmoUMKcTzZTS+lUS9pQgRuFzWQTypqjEkIW3MYIDO6uRHVvWNs4ta8tda223mFZI6uoL8fMM9woOcf2hlyD9g0P8yZFhPKkqlq7+ShWHtyPaTC2A/4D+NVLOqA6cIOntaGFbMh1GFygRLYp5NA7hkA5WVsU8WeZnUrTccHGKOOA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965957; c=relaxed/simple; bh=aKMdDFEMo/oVD6fE5n37VpWwZBts1CGWoQryXnCpJOA=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=pg+rGECRn5kylFxRP5lCVAsrLAdHTESbNmav7I3fZBtxUMa0FGa0okYE1b4r0m6F2ELK+FlHwrGBRHN7/8wFqO4aza7OdwJAQNlQS6obWxC9vJr0p52U2Idp++1Qc1p4HPv3IpN8OX5QLQ7n3KDVnGD3PlnF5j8YLctzTIYGwf4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=MXMXHrIP; arc=none smtp.client-ip=209.85.210.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="MXMXHrIP" Received: by mail-pf1-f201.google.com with SMTP id d2e1a72fcca58-77422201fd8so3846143b3a.1 for ; Mon, 15 Sep 2025 12:52:36 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965956; x=1758570756; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=W+/YCFUq0Yen9BegMvuM42I97kIDxgCZBi/LyO4DIQY=; b=MXMXHrIP7R7cskyC29tITw7ljpYYDVtJiJS8oMP9zXIRi3uVwVeLsmfpFI3DFYxtNL efuEbSUoP1jMOeEDATMjSoH+nrkr09s4VMnMEwQlDO2x3KX6cfjpHVxRVBYeMtsTN5xF VTvXdd8w8E+ygftuzu3PyKwGmKiXe61h0HJMKYlvfyOt3Nhprefjf/CRPMEVN3msZlPj q7QGdd+ElFRWc0mRJ3btRCI0AxhGll57FZEEcniofwAHq92WyjSoOcCdifn9bfwXDI5n oG6G6e0gl+jxtadaWE/CBsHNW3scmDF1ADpzCnUUSXDPPDxBolX/YPhNILnOqt6/WbiF 52ew== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965956; x=1758570756; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=W+/YCFUq0Yen9BegMvuM42I97kIDxgCZBi/LyO4DIQY=; b=JXg5IbvoYN1hxFU6bRWk+oOj/pINXwrLwLpUiVGjXY+Gh08wZ+6GSLb43NQ2ZYVRZR 1frLzFf+gxKxqSUUpBP+YYNOhWEh8WB23SsFIWlXvOYKeowK5nP6jP6qIU40D2RwdvTF 0zlHqkk/2z9niHXKS95Y0MrKUu0Qc/xOcD8vUEIUCNF/ryHMplQnZuxjWnbWWJ1TpuBo SRobj/QejlMyLDkZxNpd/Cmoj/gf5DI3n7OJiyLrnFr5Z9YMDxMOTYRq5reRGNjKZAjj HTGjAG+4TGJZqRnQpicNOZY00ymW0Or50mbjbT5+QKINef0LL3272uStGTybA1qNdz39 9t2A== X-Forwarded-Encrypted: i=1; AJvYcCUDrntvNWCj0jOwLsnhx/Z2xGN8aG/wWFg4Quts1MoSINnrkVAcrzlIwjkGqxzL6H4E6kxhk2iwjCanZW8=@vger.kernel.org X-Gm-Message-State: AOJu0Yx/wLhWk0kSU2o5G3uubqTb+as0A1D1ICb7Ydp6opVlP4UgzrQP yaFqeD96I17oL2PVtR+f5BOB/XDYEEzUn4EvSYYIezK8Qf7th0kRQ63O4S9y8VKnyh1/uSBiMg= = X-Google-Smtp-Source: AGHT+IELU+Ic0v9pK24z6KtPtYVZqHgqvjb+y/T+z6SpzU6YQC5S9+eHZfr6wIXl6+CEbQ7nMg5OLJCk X-Received: from pjab9.prod.google.com ([2002:a17:90a:1009:b0:32e:d02:d541]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a21:6da9:b0:262:6310:3fb9 with SMTP id adf61e73a8af0-26263104373mr10578957637.34.1757965955716; Mon, 15 Sep 2025 12:52:35 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:52 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-12-fvdl@google.com> Subject: [RFC PATCH 11/12] mm/cma: rebalance CMA when changing cma_first_limit From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" To keep things consistent, rebalance CMA when changing the cma_first_limit sysctl. Signed-off-by: Frank van der Linden Reviewed-by: Rik van Riel --- mm/page_alloc.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dc59aaa63ae6..da1cab63995c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6640,6 +6640,24 @@ static int percpu_pagelist_high_fraction_sysctl_hand= ler(const struct ctl_table * return ret; } =20 +#ifdef CONFIG_CMA +static int cma_first_limit_sysctl_handler(const struct ctl_table *table, + int write, void *buffer, size_t *length, + loff_t *ppos) +{ + int ret, nid; + + ret =3D proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret || !write) + return ret; + + for_each_node_state(nid, N_MEMORY) + balance_node_cma(nid, NULL); + + return 0; +} +#endif + static const struct ctl_table page_alloc_sysctl_table[] =3D { { .procname =3D "min_free_kbytes", @@ -6723,7 +6741,7 @@ static const struct ctl_table page_alloc_sysctl_table= [] =3D { .data =3D &cma_first_limit, .maxlen =3D sizeof(cma_first_limit), .mode =3D 0644, - .proc_handler =3D proc_dointvec_minmax, + .proc_handler =3D cma_first_limit_sysctl_handler, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_ONE_HUNDRED, }, --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from mail-pj1-f73.google.com (mail-pj1-f73.google.com [209.85.216.73]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2A1C029A300 for ; Mon, 15 Sep 2025 19:52:37 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.216.73 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965959; cv=none; b=A7V7OQNaTqpjx0aobFiMtr7N7E0EipTshdsUrh5QUWxO//SSPYVKGfpUMvwVMi/k5TS0K5LcM9IrDDE4dCsI+R0OXrwRRkIRS1AmIeEdpZFlfTNnnc35lXx6ipqrv2Cl9bRYmfuFDhiNmWXLFj68MB2Yha7rChkGOfeNElh9Dfk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1757965959; c=relaxed/simple; bh=E823T6HixBrikb26EDcwlUd1cuW3LPFnDlttoF3Syzw=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Cc:Content-Type; b=qxqnbbohnhrw84YnkrK+LINKA1qFIAvayd/Jx3ffuovrIHZGglInyN8cNJZCEXsKgJncNPJLHL+CunC+hLHUOXRJuyhXbeKhZNXgQFO7WzYBI+Ib7T09eIogs+dlauFuZ21Znvew9vMIBGFhmQmBqg+TVo4o+UpFpd4ns3/CrMM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=1Ek9skAL; arc=none smtp.client-ip=209.85.216.73 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--fvdl.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="1Ek9skAL" Received: by mail-pj1-f73.google.com with SMTP id 98e67ed59e1d1-324e41e946eso8237487a91.0 for ; Mon, 15 Sep 2025 12:52:37 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1757965957; x=1758570757; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=II5IQOV7eE0UCj316osBsFAD6qW3Vm1Z7tYAX1f0u8c=; b=1Ek9skALuvF30rujg7D7S4FZLGjsGFjZTMIW5ptcudn+AugCCRQ9aqCpEmx7fINLY0 VTiuDUDgVP7LJ+YyEAqS/JtJwLI2l/Gg8wDlEj2SMKPU6nBUTSIrZwcDU2BHJHbWUY05 N/foIrOhvhmCJU4wtwvA6RiUML22jACuMnhOs0+JMLNn+lpxA7l4FubiUxtIvtqRb4VE A1MNd7VcBAcoSivkfaTgDmcO5exen3nISr4DpjRNPeZ3fNNTw3OOKHRXs5IfhbYSw5rZ 6dj8lv1MHUWwoTHaCxoqhLDH0f7B6vn1+NcMLdcE8Tm9toU06342ry1Sjs20IKPkfpZS TT2w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1757965957; x=1758570757; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=II5IQOV7eE0UCj316osBsFAD6qW3Vm1Z7tYAX1f0u8c=; b=rUZ4pUNe6xUgfWzh7Wst3+76P2Yc2iFM+yha9a4PVRQPNqnpe1SHqhpGe+Hr0Fd3Bb 1RAr1d7ztm81xSO9CfUjQgJ6Qwarq616GWYG4b1T4RNl1i7D8/mqgdzYUJwzGQAZpD0+ 675vI2KFEOK2RDQq6HDYhhop1Y4H2aORs+ilk5/hZHMDQ5N1DD/MKCkxVVF2NQKmPNbT xk3WRbJkreErg9lPLsO2RE64/T/zDaEwaOC4dWaQSvQTqhcHc3BuNF4eMMUkNvvDl2Yw GKS6n0YYKBA5AEtRIcyxazFh2J3uLM69V2lGDxKrq+OKp33HWR/0NKl4iDsBDim1nrmb tOVA== X-Forwarded-Encrypted: i=1; AJvYcCXQoN2z00P9xOGOOGWRQEjuFhkSm/zgBd8qhuUKdhwVGc8MNAmNwZPQwD/b2yBzgKLACoG9sUW8xHvOUus=@vger.kernel.org X-Gm-Message-State: AOJu0YyZTMtmrT2Ibd/OrVDMWntnFQv+blJu7Pn1Oq1MS1fh1LjJroJV 6cR5Fk6mvBlKQ0lfcBEUGh7BPiUhMe9jbe74IJzEzDftkSGTngMzAR8KJk33kqPAKCNNhbn2GQ= = X-Google-Smtp-Source: AGHT+IFMJxaxylkV9OFVgqxO/4qqXvhUjwfCwnpPmGFdHD+wNdYJe1GQz83T1fH2gji69vii59G7U8uJ X-Received: from pjah8.prod.google.com ([2002:a17:90b:5908:b0:32d:e4c6:7410]) (user=fvdl job=prod-delivery.src-stubby-dispatcher) by 2002:a17:90b:4acf:b0:32e:685f:cce9 with SMTP id 98e67ed59e1d1-32e685fce45mr5253405a91.18.1757965957450; Mon, 15 Sep 2025 12:52:37 -0700 (PDT) Date: Mon, 15 Sep 2025 19:51:53 +0000 In-Reply-To: <20250915195153.462039-1-fvdl@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: git-send-email 2.51.0.384.g4c02a37b29-goog Message-ID: <20250915195153.462039-13-fvdl@google.com> Subject: [RFC PATCH 12/12] mm/cma: add CMA balance VM event counter From: Frank van der Linden To: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org Cc: hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, Frank van der Linden Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add VM counters that record the number of migration success / failures during CMA rebalancing. This is similar to other migrate counters. Signed-off-by: Frank van der Linden Reviewed-by: Rik van Riel --- include/linux/vm_event_item.h | 3 +++ mm/migrate.c | 8 ++++++++ mm/vmstat.c | 2 ++ 3 files changed, 13 insertions(+) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9e15a088ba38..1711ff85a02f 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -87,6 +87,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, CMA_ALLOC_SUCCESS, CMA_ALLOC_FAIL, #endif + CMA_BALANCE_MIGRATE_SUCCESS, + CMA_BALANCE_MIGRATE_FAIL, + UNEVICTABLE_PGCULLED, /* culled to noreclaim list */ UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */ UNEVICTABLE_PGRESCUED, /* rescued from noreclaim list */ diff --git a/mm/migrate.c b/mm/migrate.c index 9e5ef39ce73a..63d771daa3bc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2144,6 +2144,14 @@ int migrate_pages(struct list_head *from, new_folio_= t get_new_folio, count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded); count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed); count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split); + + if (reason =3D=3D MR_CMA_BALANCE) { + count_vm_events(CMA_BALANCE_MIGRATE_SUCCESS, + stats.nr_succeeded); + count_vm_events(CMA_BALANCE_MIGRATE_FAIL, + stats.nr_failed_pages); + } + trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages, stats.nr_thp_succeeded, stats.nr_thp_failed, stats.nr_thp_split, stats.nr_split, mode, diff --git a/mm/vmstat.c b/mm/vmstat.c index 71cd1ceba191..af811328db09 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1392,6 +1392,8 @@ const char * const vmstat_text[] =3D { #ifdef CONFIG_CMA [I(CMA_ALLOC_SUCCESS)] =3D "cma_alloc_success", [I(CMA_ALLOC_FAIL)] =3D "cma_alloc_fail", + [I(CMA_BALANCE_MIGRATE_SUCCESS)] =3D "cma_balance_migrate_success", + [I(CMA_BALANCE_MIGRATE_FAIL)] =3D "cma_balance_migrate_fail", #endif [I(UNEVICTABLE_PGCULLED)] =3D "unevictable_pgs_culled", [I(UNEVICTABLE_PGSCANNED)] =3D "unevictable_pgs_scanned", --=20 2.51.0.384.g4c02a37b29-goog From nobody Thu Oct 2 14:22:15 2025 Received: from shelob.surriel.com (shelob.surriel.com [96.67.55.147]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DCDB5235358 for ; Thu, 25 Sep 2025 22:11:47 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=96.67.55.147 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758838311; cv=none; b=avks25I6sWzP/A6NwouH+8MMG7DOe09syG4WkBU/c6SGI393qn2qRJSKjze9VpcLb1uV79hriI5AivPy9kaoFyoSDOaYf4b0BiqA5shYU8WI4x2vG392JOukICkbC+7fMfnlkJVK2GJyCXCBf2+vtj/SBcrtxhK3Ia2xnVL82sY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1758838311; c=relaxed/simple; bh=Gp/6ffGOP7iZI9CGEDj3yzg0tEsKP4qoYYv3v6jO/1Q=; h=Date:From:To:Cc:Subject:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=ujP+alMuahRhuJKPzcdvTHgkn7VVaQHrJGAKR2fC/k1DWOw4bGjmivpBab7s7aLibndM4b/2Vtw0hAxSKWVknLvMMyslm/h4ultvYmZGWRcLixeopSDOmLQc8Zzyqf94NMK8uzXcN9M7eMqS0QFpUiviWwmGiXPWbKKd50hQ+sM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=surriel.com; spf=pass smtp.mailfrom=surriel.com; dkim=pass (2048-bit key) header.d=surriel.com header.i=@surriel.com header.b=LDlINN8O; arc=none smtp.client-ip=96.67.55.147 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=surriel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=surriel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=surriel.com header.i=@surriel.com header.b="LDlINN8O" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=surriel.com ; s=mail; h=Content-Transfer-Encoding:Content-Type:MIME-Version:References: In-Reply-To:Message-ID:Subject:Cc:To:From:Date:Sender:Reply-To:Content-ID: Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc :Resent-Message-ID:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=cV19K2kHwbzsptwnWHvhPs0HRaJ8ZbKRyVa4qDbsSX8=; b=LDlINN8O1LAKs2p5yHxLeWpBOw 1/sCr+l5elgqI6Gv4YhA2Q05sQWwyTH3hXXFtaocpJjuPd99GX9HZdwZ6zi634uAZ/7Db0xPDpIdW jdKHhbcSuKjMOAf1HnSDqANuFp3MpZsVtsPiEdit3k7viRy/62ZmD2YYsr9cr6eqJBzFXewZA4ysr 0d3kYU3u7rwwzI6NoZH4cqRRkRRwwbjcoftoaLuHWiioh51XJ5CAs47S/4/ru8oqX1ks6fGDWzCjS 8Fb2TakL10qWB/IpR7rJFhW4eOMbe8sTNLL2jgxWZtqhNlo/CS2/S/L2yQXr5JdQvLF5fwc7SWO0v 605RQA7w==; Received: from [2601:18c:8180:83cc:5a47:caff:fe78:8708] (helo=fangorn) by shelob.surriel.com with esmtpsa (TLS1.2) tls TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 (Exim 4.97.1) (envelope-from ) id 1v1uBA-000000008PK-1RQG; Thu, 25 Sep 2025 18:11:08 -0400 Date: Thu, 25 Sep 2025 18:11:06 -0400 From: Rik van Riel To: Frank van der Linden Cc: akpm@linux-foundation.org, muchun.song@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org, hannes@cmpxchg.org, david@redhat.com, roman.gushchin@linux.dev, kernel-team@meta.com Subject: [RFC PATCH 13/12] mm,cma: add compaction cma balance helper for direct reclaim Message-ID: <20250925181106.3924a90c@fangorn> In-Reply-To: <20250915195153.462039-1-fvdl@google.com> References: <20250915195153.462039-1-fvdl@google.com> X-Mailer: Claws Mail 4.3.1 (GTK 3.24.43; x86_64-redhat-linux-gnu) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" On Mon, 15 Sep 2025 19:51:41 +0000 Frank van der Linden wrote: > This is an RFC on a solution to the long standing problem of OOMs > occuring when the kernel runs out of space for unmovable allocations > in the face of large amounts of CMA. In order to make the CMA balancing code useful without hugetlb involvement, eg. when simply allocating a !__GFP_MOVABLE allocation, I added two patches to invoke CMA balancing from the page reclaim code when needed. With these changes, we might no longer need to call the CMA balancing code from the hugetlb free path any more, and could potentially simplify some things in that area. ---8<--- From 99991606760fdf8399255d7fc1f21b58069a4afe Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 23 Sep 2025 10:01:42 -0700 Subject: [PATCH 2/3] mm,cma: add compaction cma balance helper for direct r= eclaim Add a cma balance helper for the direct reclaim code, which does not balance CMA free memory all the way, but only a limited number of pages. Signed-off-by: Rik van Riel --- mm/compaction.c | 20 ++++++++++++++++++-- mm/internal.h | 7 +++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 3200119b8baf..90478c29db60 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2541,7 +2541,7 @@ isolate_free_cma_pages(struct compact_control *cc) cc->free_pfn =3D next_pfn; } =20 -static void balance_zone_cma(struct zone *zone, struct cma *cma) +static void balance_zone_cma(struct zone *zone, struct cma *cma, int targe= t) { struct compact_control cc =3D { .zone =3D zone, @@ -2613,6 +2613,13 @@ static void balance_zone_cma(struct zone *zone, stru= ct cma *cma) nr_pages =3D min(nr_pages, cma_get_available(cma)); nr_pages =3D min(allocated_noncma, nr_pages); =20 + /* + * When invoked from page reclaim, use the provided target rather + * than the calculated one. + */ + if (target) + nr_pages =3D target; + for (order =3D 0; order < NR_PAGE_ORDERS; order++) INIT_LIST_HEAD(&cc.freepages[order]); INIT_LIST_HEAD(&cc.migratepages); @@ -2674,10 +2681,19 @@ void balance_node_cma(int nid, struct cma *cma) if (!populated_zone(zone)) continue; =20 - balance_zone_cma(zone, cma); + balance_zone_cma(zone, cma, 0); } } =20 +void balance_cma_zonelist(struct zonelist *zonelist, int nr_pages) +{ + struct zoneref *z; + struct zone *zone; + + for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) + balance_zone_cma(zone, NULL, nr_pages); +} + #endif /* CONFIG_CMA */ =20 static enum compact_result diff --git a/mm/internal.h b/mm/internal.h index 7dcaf7214683..5340b94683bf 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -942,6 +942,7 @@ struct cma; void *cma_reserve_early(struct cma *cma, unsigned long size); void init_cma_pageblock(struct page *page); void balance_node_cma(int nid, struct cma *cma); +void balance_cma_zonelist(struct zonelist *zonelist, int nr_pages); #else static inline void *cma_reserve_early(struct cma *cma, unsigned long size) { @@ -950,6 +951,12 @@ static inline void *cma_reserve_early(struct cma *cma,= unsigned long size) static inline void init_cma_pageblock(struct page *page) { } +static inline void balance_node_cma(int nid, struct cma *cma) +{ +} +static inline void balance_cma_zonelist(struct zonelist *zonelist, int nr_= pages) +{ +} #endif =20 =20 --=20 2.47.3