We can just cleanup the code by calculating the #refs earlier,
so we can just inline what remains of record_subpages().
Calculate the number of references/pages ahead of times, and record them
only once all our tests passed.
Signed-off-by: David Hildenbrand <david@redhat.com>
---
mm/gup.c | 25 ++++++++-----------------
1 file changed, 8 insertions(+), 17 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index c10cd969c1a3b..f0f4d1a68e094 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm)
#ifdef CONFIG_MMU
#ifdef CONFIG_HAVE_GUP_FAST
-static int record_subpages(struct page *page, unsigned long sz,
- unsigned long addr, unsigned long end,
- struct page **pages)
-{
- int nr;
-
- page += (addr & (sz - 1)) >> PAGE_SHIFT;
- for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
- pages[nr] = page++;
-
- return nr;
-}
-
/**
* try_grab_folio_fast() - Attempt to get or pin a folio in fast path.
* @page: pointer to page to be grabbed
@@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
if (pmd_special(orig))
return 0;
- page = pmd_page(orig);
- refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
+ refs = (end - addr) >> PAGE_SHIFT;
+ page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
@@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
}
*nr += refs;
+ for (; refs; refs--)
+ *(pages++) = page++;
folio_set_referenced(folio);
return 1;
}
@@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
if (pud_special(orig))
return 0;
- page = pud_page(orig);
- refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
+ refs = (end - addr) >> PAGE_SHIFT;
+ page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
folio = try_grab_folio_fast(page, refs, flags);
if (!folio)
@@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
}
*nr += refs;
+ for (; refs; refs--)
+ *(pages++) = page++;
folio_set_referenced(folio);
return 1;
}
--
2.50.1
On Mon, Sep 01, 2025 at 05:03:40PM +0200, David Hildenbrand wrote: > We can just cleanup the code by calculating the #refs earlier, > so we can just inline what remains of record_subpages(). > > Calculate the number of references/pages ahead of times, and record them > only once all our tests passed. I'm seeing failures in kselftest-mm in -next on at least Raspberry Pi 4 and Orion O6 which bisect to this patch. I'm seeing a NULL pointer dereference during the GUP test (which isn't actually doing anything as I'm just using a standard defconfig rather than one with the mm fragment): # # # [RUN] R/O longterm GUP-fast pin in MAP_SHARED file mapping .[ 92.209804] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 ... [ 92.443816] Call trace: [ 92.446284] io_check_coalesce_buffer+0xd4/0x160 (P) [ 92.451306] io_sqe_buffers_register+0x1b0/0x22c [ 92.455976] __arm64_sys_io_uring_register+0x330/0xe74 [ 92.461176] invoke_syscall+0x48/0x104 [ 92.464966] el0_svc_common.constprop.0+0x40/0xe0 Full log: https://lava.sirena.org.uk/scheduler/job/1778528#L1985 The bisect looks to converge reasonably clearly, I didn't make much more effort to diagnose: # bad: [be5d4872e528796df9d7425f2bd9b3893eb3a42c] Add linux-next specific files for 20250905 # good: [5fe42852269dc659c8d511864410bd5cf3393e91] Merge branch 'for-linux-next-fixes' of https://gitlab.freedesktop.org/drm/misc/kernel.git # good: [0ccc1eeda155c947d88ef053e0b54e434e218ee2] ASoC: dt-bindings: wlf,wm8960: Document routing strings (pin names) # good: [7748328c2fd82efed24257b2bfd796eb1fa1d09b] ASoC: dt-bindings: qcom,lpass-va-macro: Update bindings for clocks to support ADSP # good: [dd7ae5b8b3c291c0206f127a564ae1e316705ca0] ASoC: cs42l43: Shutdown jack detection on suspend # good: [ce1a46b2d6a8465a86f7a6f71beb4c6de83bce5c] ASoC: codecs: lpass-wsa-macro: add Codev version 2.9 # good: [ce57b718006a069226b5e5d3afe7969acd59154e] ASoC: Intel: avs: ssm4567: Adjust platform name # good: [94b39cb3ad6db935b585988b36378884199cd5fc] spi: mxs: fix "transfered"->"transferred" # good: [5cc49b5a36b32a2dba41441ea13b93fb5ea21cfd] spi: spi-fsl-dspi: Report FIFO overflows as errors # good: [3279052eab235bfb7130b1fabc74029c2260ed8d] ASoC: SOF: ipc4-topology: Fix a less than zero check on a u32 # good: [8f57dcf39fd0864f5f3e6701fe885e55f45d0d3a] ASoC: qcom: audioreach: convert to cpu endainess type before accessing # good: [9d35d068fb138160709e04e3ee97fe29a6f8615b] regulator: scmi: Use int type to store negative error codes # good: [8a9772ec08f87c9e45ab1ad2c8d2b8c1763836eb] ASoC: soc-dapm: rename snd_soc_kcontrol_component() to snd_soc_kcontrol_to_component() # good: [07752abfa5dbf7cb4d9ce69fa94dc3b12bc597d9] ASoC: SOF: sof-client: Introduce sof_client_dev_entry structure # good: [d57d27171c92e9049d5301785fb38de127b28fbf] ASoC: SOF: sof-client-probes: Add available points_info(), IPC4 only # good: [f7c41911ad744177d8289820f01009dc93d8f91c] ASoC: SOF: ipc4-topology: Add support for float sample type # good: [3d439e1ec3368fae17db379354bd7a9e568ca0ab] ASoC: sof: ipc4-topology: Add support to sched_domain attribute # good: [5c39bc498f5ff7ef016abf3f16698f3e8db79677] ASoC: SOF: Intel: only detect codecs when HDA DSP probe # good: [f522da9ab56c96db8703b2ea0f09be7cdc3bffeb] ASoC: doc: Internally link to Writing an ALSA Driver docs # good: [f4672dc6e9c07643c8c755856ba8e9eb9ca95d0c] regmap: use int type to store negative error codes # good: [b088b6189a4066b97cef459afd312fd168a76dea] ASoC: mediatek: common: Switch to for_each_available_child_of_node_scoped() # good: [c42e36a488c7e01f833fc9f4814f735b66b2d494] spi: Drop dev_pm_domain_detach() call # good: [a37280daa4d583c7212681c49b285de9464a5200] ASoC: Intel: avs: Allow i2s test and non-test boards to coexist # good: [ff9a7857b7848227788f113d6dc6a72e989084e0] spi: rb4xx: use devm for clk_prepare_enable # good: [edb5c1f885207d1d74e8a1528e6937e02829ee6e] ASoC: renesas: msiof: start DMAC first # good: [e2ab5f600bb01d3625d667d97b3eb7538e388336] rust: regulator: use `to_result` for error handling # good: [5b4dcaf851df8c414bfc2ac3bf9c65fc942f3be4] ASoC: amd: acp: Remove (explicitly) unused header # good: [899fb38dd76dd3ede425bbaf8a96d390180a5d1c] regulator: core: Remove redundant ternary operators # good: [11f5c5f9e43e9020bae452232983fe98e7abfce0] ASoC: qcom: use int type to store negative error codes # good: [a12b74d2bd4724ee1883bc97ec93eac8fafc8d3c] ASoC: tlv320aic32x4: use dev_err_probe() for regulators # good: [f840737d1746398c2993be34bfdc80bdc19ecae2] ASoC: SOF: imx: Remove the use of dev_err_probe() # good: [d78e48ebe04e9566f8ecbf51471e80da3adbceeb] ASoC: dt-bindings: Minor whitespace cleanup in example # good: [96bcb34df55f7fee99795127c796315950c94fed] ASoC: test-component: Use kcalloc() instead of kzalloc() # good: [c232495d28ca092d0c39b10e35d3d613bd2414ab] ASoC: dt-bindings: omap-twl4030: convert to DT schema # good: [87a877de367d835b527d1086f75727123ef85fc4] KVM: x86: Rename handle_fastpath_set_msr_irqoff() to handle_fastpath_wrmsr() # good: [c26675447faff8c4ddc1dc5d2cd28326b8181aaf] KVM: x86: Zero XSTATE components on INIT by iterating over supported features # good: [ec0be3cdf40b5302248f3fb27a911cc630e8b855] regulator: consumer.rst: document bulk operations # good: [27848c082ba0b22850fd9fb7b185c015423dcdc7] spi: s3c64xx: Remove the use of dev_err_probe() # good: [c1dd310f1d76b4b13f1854618087af2513140897] spi: SPISG: Use devm_kcalloc() in aml_spisg_clk_init() # good: [da9881d00153cc6d3917f6b74144b1d41b58338c] ASoC: qcom: audioreach: add support for SMECNS module # good: [cf65182247761f7993737b710afe8c781699356b] ASoC: codecs: wsa883x: Handle shared reset GPIO for WSA883x speakers # good: [2a55135201d5e24b80b7624880ff42eafd8e320c] ASoC: Intel: avs: Streamline register-component function names # good: [550bc517e59347b3b1af7d290eac4fb1411a3d4e] regulator: bd718x7: Use kcalloc() instead of kzalloc() # good: [0056b410355713556d8a10306f82e55b28d33ba8] spi: offload trigger: adi-util-sigma-delta: clean up imports # good: [daf855f76a1210ceed9541f71ac5dd9be02018a6] ASoC: es8323: enable DAPM power widgets for playback DAC # good: [90179609efa421b1ccc7d8eafbc078bafb25777c] spi: spl022: use min_t() to improve code # good: [258384d8ce365dddd6c5c15204de8ccd53a7ab0a] ASoC: es8323: enable DAPM power widgets for playback DAC and output # good: [6d068f1ae2a2f713d7f21a9a602e65b3d6b6fc6d] regulator: rt5133: Fix spelling mistake "regualtor" -> "regulator" # good: [a46e95c81e3a28926ab1904d9f754fef8318074d] ASoC: wl1273: Remove # good: [48124569bbc6bfda1df3e9ee17b19d559f4b1aa3] spi: remove unneeded 'fast_io' parameter in regmap_config # good: [37533933bfe92cd5a99ef4743f31dac62ccc8de0] regulator: remove unneeded 'fast_io' parameter in regmap_config # good: [0e62438e476494a1891a8822b9785bc6e73e9c3f] ASoC: Intel: sst: Remove redundant semicolons # good: [5c36b86d2bf68fbcad16169983ef7ee8c537db59] regmap: Remove superfluous check for !config in __regmap_init() # good: [714165e1c4b0d5b8c6d095fe07f65e6e7047aaeb] regulator: rt5133: Add RT5133 PMIC regulator Support # good: [9c45f95222beecd6a284fd1284d54dd7a772cf59] spi: spi-qpic-snand: handle 'use_ecc' parameter of qcom_spi_config_cw_read() # good: [bab4ab484a6ca170847da9bffe86f1fa90df4bbe] ASoC: dt-bindings: Convert brcm,bcm2835-i2s to DT schema # good: [b832b19318534bb4f1673b24d78037fee339c679] spi: loopback-test: Don't use %pK through printk # good: [8c02c8353460f8630313aef6810f34e134a3c1ee] ASoC: dt-bindings: realtek,alc5623: convert to DT schema # good: [6b7e2aa50bdaf88cd4c2a5e2059a7bf32d85a8b1] spi: spi-qpic-snand: remove 'clr*status' members of struct 'qpic_ecc' # good: [2291a2186305faaf8525d57849d8ba12ad63f5e7] MAINTAINERS: Add entry for FourSemi audio amplifiers # good: [a54ef14188519a0994d0264f701f5771815fa11e] regulator: dt-bindings: Clean-up active-semi,act8945a duplication # good: [a1d0b0ae65ae3f32597edfbb547f16c75601cd87] spi: spi-qpic-snand: avoid double assignment in qcom_spi_probe() # good: [cf25eb8eae91bcae9b2065d84b0c0ba0f6d9dd34] ASoC: soc-component: unpack snd_soc_component_init_bias_level() # good: [595b7f155b926460a00776cc581e4dcd01220006] ASoC: Intel: avs: Conditional-path support # good: [3059067fd3378a5454e7928c08d20bf3ef186760] ASoC: cs48l32: Use PTR_ERR_OR_ZERO() to simplify code # good: [2d86d2585ab929a143d1e6f8963da1499e33bf13] ASoC: pxa: add GPIOLIB_LEGACY dependency # good: [9a200cbdb54349909a42b45379e792e4b39dd223] rust: regulator: implement Send and Sync for Regulator<T> # good: [162e23657e5379f07c6404dbfbf4367cb438ea7d] regulator: pf0900: Add PMIC PF0900 support # good: [886f42ce96e7ce80545704e7168a9c6b60cd6c03] regmap: mmio: Add missing MODULE_DESCRIPTION() # good: [6684aba0780da9f505c202f27e68ee6d18c0aa66] XArray: Add extra debugging check to xas_lock and friends git bisect start 'be5d4872e528796df9d7425f2bd9b3893eb3a42c' '5fe42852269dc659c8d511864410bd5cf3393e91' '0ccc1eeda155c947d88ef053e0b54e434e218ee2' '7748328c2fd82efed24257b2bfd796eb1fa1d09b' 'dd7ae5b8b3c291c0206f127a564ae1e316705ca0' 'ce1a46b2d6a8465a86f7a6f71beb4c6de83bce5c' 'ce57b718006a069226b5e5d3afe7969acd59154e' '94b39cb3ad6db935b585988b36378884199cd5fc' '5cc49b5a36b32a2dba41441ea13b93fb5ea21cfd' '3279052eab235bfb7130b1fabc74029c2260ed8d' '8f57dcf39fd0864f5f3e6701fe885e55f45d0d3a' '9d35d068fb138160709e04e3ee97fe29a6f8615b' '8a9772ec08f87c9e45ab1ad2c8d2b8c1763836eb' '07752abfa5dbf7cb4d9ce69fa94dc3b12bc597d9' 'd57d27171c92e9049d5301785fb38de127b28fbf' 'f7c41911ad744177d8289820f01009dc93d8f91c' '3d439e1ec3368fae17db379354bd7a9e568ca0ab' '5c39bc498f5ff7ef016abf3f16698f3e8db79677' 'f522da9ab56c96db8703b2ea0f09be7cdc3bffeb' 'f4672dc6e9c07643c8c755856ba8e9eb9ca95d0c' 'b088b6189a4066b97cef459afd312fd168a76dea' 'c42e36a488c7e01f833fc9f4814f735b66b2d494' 'a37280daa4d583c7212681c49b285de9464a5200' 'ff9a7857b7848227788f113d6dc6a72e989084e0' 'edb5c1f885207d1d74e8a1528e6937e02829ee6e' 'e2ab5f600bb01d3625d667d97b3eb7538e388336' '5b4dcaf851df8c414bfc2ac3bf9c65fc942f3be4' '899fb38dd76dd3ede425bbaf8a96d390180a5d1c' '11f5c5f9e43e9020bae452232983fe98e7abfce0' 'a12b74d2bd4724ee1883bc97ec93eac8fafc8d3c' 'f840737d1746398c2993be34bfdc80bdc19ecae2' 'd78e48ebe04e9566f8ecbf51471e80da3adbceeb' '96bcb34df55f7fee99795127c796315950c94fed' 'c232495d28ca092d0c39b10e35d3d613bd2414ab' '87a877de367d835b527d1086f75727123ef85fc4' 'c26675447faff8c4ddc1dc5d2cd28326b8181aaf' 'ec0be3cdf40b5302248f3fb27a911cc630e8b855' '27848c082ba0b22850fd9fb7b185c015423dcdc7' 'c1dd310f1d76b4b13f1854618087af2513140897' 'da9881d00153cc6d3917f6b74144b1d41b58338c' 'cf65182247761f7993737b710afe8c781699356b' '2a55135201d5e24b80b7624880ff42eafd8e320c' '550bc517e59347b3b1af7d290eac4fb1411a3d4e' '0056b410355713556d8a10306f82e55b28d33ba8' 'daf855f76a1210ceed9541f71ac5dd9be02018a6' '90179609efa421b1ccc7d8eafbc078bafb25777c' '258384d8ce365dddd6c5c15204de8ccd53a7ab0a' '6d068f1ae2a2f713d7f21a9a602e65b3d6b6fc6d' 'a46e95c81e3a28926ab1904d9f754fef8318074d' '48124569bbc6bfda1df3e9ee17b19d559f4b1aa3' '37533933bfe92cd5a99ef4743f31dac62ccc8de0' '0e62438e476494a1891a8822b9785bc6e73e9c3f' '5c36b86d2bf68fbcad16169983ef7ee8c537db59' '714165e1c4b0d5b8c6d095fe07f65e6e7047aaeb' '9c45f95222beecd6a284fd1284d54dd7a772cf59' 'bab4ab484a6ca170847da9bffe86f1fa90df4bbe' 'b832b19318534bb4f1673b24d78037fee339c679' '8c02c8353460f8630313aef6810f34e134a3c1ee' '6b7e2aa50bdaf88cd4c2a5e2059a7bf32d85a8b1' '2291a2186305faaf8525d57849d8ba12ad63f5e7' 'a54ef14188519a0994d0264f701f5771815fa11e' 'a1d0b0ae65ae3f32597edfbb547f16c75601cd87' 'cf25eb8eae91bcae9b2065d84b0c0ba0f6d9dd34' '595b7f155b926460a00776cc581e4dcd01220006' '3059067fd3378a5454e7928c08d20bf3ef186760' '2d86d2585ab929a143d1e6f8963da1499e33bf13' '9a200cbdb54349909a42b45379e792e4b39dd223' '162e23657e5379f07c6404dbfbf4367cb438ea7d' '886f42ce96e7ce80545704e7168a9c6b60cd6c03' '6684aba0780da9f505c202f27e68ee6d18c0aa66' # test job: [0ccc1eeda155c947d88ef053e0b54e434e218ee2] https://lava.sirena.org.uk/scheduler/job/1773040 # test job: [7748328c2fd82efed24257b2bfd796eb1fa1d09b] https://lava.sirena.org.uk/scheduler/job/1773378 # test job: [dd7ae5b8b3c291c0206f127a564ae1e316705ca0] https://lava.sirena.org.uk/scheduler/job/1773233 # test job: [ce1a46b2d6a8465a86f7a6f71beb4c6de83bce5c] https://lava.sirena.org.uk/scheduler/job/1768983 # test job: [ce57b718006a069226b5e5d3afe7969acd59154e] https://lava.sirena.org.uk/scheduler/job/1768713 # test job: [94b39cb3ad6db935b585988b36378884199cd5fc] https://lava.sirena.org.uk/scheduler/job/1768603 # test job: [5cc49b5a36b32a2dba41441ea13b93fb5ea21cfd] https://lava.sirena.org.uk/scheduler/job/1769293 # test job: [3279052eab235bfb7130b1fabc74029c2260ed8d] https://lava.sirena.org.uk/scheduler/job/1762427 # test job: [8f57dcf39fd0864f5f3e6701fe885e55f45d0d3a] https://lava.sirena.org.uk/scheduler/job/1760074 # test job: [9d35d068fb138160709e04e3ee97fe29a6f8615b] https://lava.sirena.org.uk/scheduler/job/1758673 # test job: [8a9772ec08f87c9e45ab1ad2c8d2b8c1763836eb] https://lava.sirena.org.uk/scheduler/job/1758556 # test job: [07752abfa5dbf7cb4d9ce69fa94dc3b12bc597d9] https://lava.sirena.org.uk/scheduler/job/1752251 # test job: [d57d27171c92e9049d5301785fb38de127b28fbf] https://lava.sirena.org.uk/scheduler/job/1752624 # test job: [f7c41911ad744177d8289820f01009dc93d8f91c] https://lava.sirena.org.uk/scheduler/job/1752345 # test job: [3d439e1ec3368fae17db379354bd7a9e568ca0ab] https://lava.sirena.org.uk/scheduler/job/1753454 # test job: [5c39bc498f5ff7ef016abf3f16698f3e8db79677] https://lava.sirena.org.uk/scheduler/job/1751954 # test job: [f522da9ab56c96db8703b2ea0f09be7cdc3bffeb] https://lava.sirena.org.uk/scheduler/job/1751875 # test job: [f4672dc6e9c07643c8c755856ba8e9eb9ca95d0c] https://lava.sirena.org.uk/scheduler/job/1747876 # test job: [b088b6189a4066b97cef459afd312fd168a76dea] https://lava.sirena.org.uk/scheduler/job/1746202 # test job: [c42e36a488c7e01f833fc9f4814f735b66b2d494] https://lava.sirena.org.uk/scheduler/job/1746271 # test job: [a37280daa4d583c7212681c49b285de9464a5200] https://lava.sirena.org.uk/scheduler/job/1746918 # test job: [ff9a7857b7848227788f113d6dc6a72e989084e0] https://lava.sirena.org.uk/scheduler/job/1746336 # test job: [edb5c1f885207d1d74e8a1528e6937e02829ee6e] https://lava.sirena.org.uk/scheduler/job/1746134 # test job: [e2ab5f600bb01d3625d667d97b3eb7538e388336] https://lava.sirena.org.uk/scheduler/job/1746607 # test job: [5b4dcaf851df8c414bfc2ac3bf9c65fc942f3be4] https://lava.sirena.org.uk/scheduler/job/1747672 # test job: [899fb38dd76dd3ede425bbaf8a96d390180a5d1c] https://lava.sirena.org.uk/scheduler/job/1747375 # test job: [11f5c5f9e43e9020bae452232983fe98e7abfce0] https://lava.sirena.org.uk/scheduler/job/1747503 # test job: [a12b74d2bd4724ee1883bc97ec93eac8fafc8d3c] https://lava.sirena.org.uk/scheduler/job/1734077 # test job: [f840737d1746398c2993be34bfdc80bdc19ecae2] https://lava.sirena.org.uk/scheduler/job/1727318 # test job: [d78e48ebe04e9566f8ecbf51471e80da3adbceeb] https://lava.sirena.org.uk/scheduler/job/1706175 # test job: [96bcb34df55f7fee99795127c796315950c94fed] https://lava.sirena.org.uk/scheduler/job/1699577 # test job: [c232495d28ca092d0c39b10e35d3d613bd2414ab] https://lava.sirena.org.uk/scheduler/job/1699507 # test job: [87a877de367d835b527d1086f75727123ef85fc4] https://lava.sirena.org.uk/scheduler/job/1697972 # test job: [c26675447faff8c4ddc1dc5d2cd28326b8181aaf] https://lava.sirena.org.uk/scheduler/job/1698132 # test job: [ec0be3cdf40b5302248f3fb27a911cc630e8b855] https://lava.sirena.org.uk/scheduler/job/1694308 # test job: [27848c082ba0b22850fd9fb7b185c015423dcdc7] https://lava.sirena.org.uk/scheduler/job/1693100 # test job: [c1dd310f1d76b4b13f1854618087af2513140897] https://lava.sirena.org.uk/scheduler/job/1693035 # test job: [da9881d00153cc6d3917f6b74144b1d41b58338c] https://lava.sirena.org.uk/scheduler/job/1693416 # test job: [cf65182247761f7993737b710afe8c781699356b] https://lava.sirena.org.uk/scheduler/job/1687562 # test job: [2a55135201d5e24b80b7624880ff42eafd8e320c] https://lava.sirena.org.uk/scheduler/job/1685772 # test job: [550bc517e59347b3b1af7d290eac4fb1411a3d4e] https://lava.sirena.org.uk/scheduler/job/1685910 # test job: [0056b410355713556d8a10306f82e55b28d33ba8] https://lava.sirena.org.uk/scheduler/job/1685649 # test job: [daf855f76a1210ceed9541f71ac5dd9be02018a6] https://lava.sirena.org.uk/scheduler/job/1685441 # test job: [90179609efa421b1ccc7d8eafbc078bafb25777c] https://lava.sirena.org.uk/scheduler/job/1686081 # test job: [258384d8ce365dddd6c5c15204de8ccd53a7ab0a] https://lava.sirena.org.uk/scheduler/job/1673411 # test job: [6d068f1ae2a2f713d7f21a9a602e65b3d6b6fc6d] https://lava.sirena.org.uk/scheduler/job/1673133 # test job: [a46e95c81e3a28926ab1904d9f754fef8318074d] https://lava.sirena.org.uk/scheduler/job/1673748 # test job: [48124569bbc6bfda1df3e9ee17b19d559f4b1aa3] https://lava.sirena.org.uk/scheduler/job/1670184 # test job: [37533933bfe92cd5a99ef4743f31dac62ccc8de0] https://lava.sirena.org.uk/scheduler/job/1668977 # test job: [0e62438e476494a1891a8822b9785bc6e73e9c3f] https://lava.sirena.org.uk/scheduler/job/1669534 # test job: [5c36b86d2bf68fbcad16169983ef7ee8c537db59] https://lava.sirena.org.uk/scheduler/job/1667971 # test job: [714165e1c4b0d5b8c6d095fe07f65e6e7047aaeb] https://lava.sirena.org.uk/scheduler/job/1667699 # test job: [9c45f95222beecd6a284fd1284d54dd7a772cf59] https://lava.sirena.org.uk/scheduler/job/1667598 # test job: [bab4ab484a6ca170847da9bffe86f1fa90df4bbe] https://lava.sirena.org.uk/scheduler/job/1664664 # test job: [b832b19318534bb4f1673b24d78037fee339c679] https://lava.sirena.org.uk/scheduler/job/1659213 # test job: [8c02c8353460f8630313aef6810f34e134a3c1ee] https://lava.sirena.org.uk/scheduler/job/1659264 # test job: [6b7e2aa50bdaf88cd4c2a5e2059a7bf32d85a8b1] https://lava.sirena.org.uk/scheduler/job/1656585 # test job: [2291a2186305faaf8525d57849d8ba12ad63f5e7] https://lava.sirena.org.uk/scheduler/job/1655709 # test job: [a54ef14188519a0994d0264f701f5771815fa11e] https://lava.sirena.org.uk/scheduler/job/1656024 # test job: [a1d0b0ae65ae3f32597edfbb547f16c75601cd87] https://lava.sirena.org.uk/scheduler/job/1654201 # test job: [cf25eb8eae91bcae9b2065d84b0c0ba0f6d9dd34] https://lava.sirena.org.uk/scheduler/job/1654790 # test job: [595b7f155b926460a00776cc581e4dcd01220006] https://lava.sirena.org.uk/scheduler/job/1653119 # test job: [3059067fd3378a5454e7928c08d20bf3ef186760] https://lava.sirena.org.uk/scheduler/job/1655440 # test job: [2d86d2585ab929a143d1e6f8963da1499e33bf13] https://lava.sirena.org.uk/scheduler/job/1655917 # test job: [9a200cbdb54349909a42b45379e792e4b39dd223] https://lava.sirena.org.uk/scheduler/job/1654762 # test job: [162e23657e5379f07c6404dbfbf4367cb438ea7d] https://lava.sirena.org.uk/scheduler/job/1652978 # test job: [886f42ce96e7ce80545704e7168a9c6b60cd6c03] https://lava.sirena.org.uk/scheduler/job/1654270 # test job: [6684aba0780da9f505c202f27e68ee6d18c0aa66] https://lava.sirena.org.uk/scheduler/job/1738722 # test job: [be5d4872e528796df9d7425f2bd9b3893eb3a42c] https://lava.sirena.org.uk/scheduler/job/1778528 # bad: [be5d4872e528796df9d7425f2bd9b3893eb3a42c] Add linux-next specific files for 20250905 git bisect bad be5d4872e528796df9d7425f2bd9b3893eb3a42c # test job: [c3ce85ecd0268df1e0ca692e8126bb181fc89a08] https://lava.sirena.org.uk/scheduler/job/1779086 # bad: [c3ce85ecd0268df1e0ca692e8126bb181fc89a08] Merge branch 'main' of https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git git bisect bad c3ce85ecd0268df1e0ca692e8126bb181fc89a08 # test job: [973a887a5bb9a42878e276209592e0f75c287bb6] https://lava.sirena.org.uk/scheduler/job/1780104 # bad: [973a887a5bb9a42878e276209592e0f75c287bb6] Merge branch 'fs-next' of linux-next git bisect bad 973a887a5bb9a42878e276209592e0f75c287bb6 # test job: [fdabd8890022a9439b95d7395f7ae046544d96fd] https://lava.sirena.org.uk/scheduler/job/1780530 # bad: [fdabd8890022a9439b95d7395f7ae046544d96fd] Merge branch 'for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git git bisect bad fdabd8890022a9439b95d7395f7ae046544d96fd # test job: [94bd0249a4a06131c4a1c2097b6134217a658976] https://lava.sirena.org.uk/scheduler/job/1780904 # bad: [94bd0249a4a06131c4a1c2097b6134217a658976] Merge branch 'for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git git bisect bad 94bd0249a4a06131c4a1c2097b6134217a658976 # test job: [702b6c2f1008779e8fc8a4a4438410165309a4b4] https://lava.sirena.org.uk/scheduler/job/1781370 # bad: [702b6c2f1008779e8fc8a4a4438410165309a4b4] kasan-apply-write-only-mode-in-kasan-kunit-testcases-v7 git bisect bad 702b6c2f1008779e8fc8a4a4438410165309a4b4 # test job: [0ac48805721d5952a920356e454167bba8d27737] https://lava.sirena.org.uk/scheduler/job/1781448 # good: [0ac48805721d5952a920356e454167bba8d27737] mm: convert page_to_section() to memdesc_section() git bisect good 0ac48805721d5952a920356e454167bba8d27737 # test job: [dc731eba2e47fa81d50aa1cb167100889253cfe0] https://lava.sirena.org.uk/scheduler/job/1781608 # good: [dc731eba2e47fa81d50aa1cb167100889253cfe0] mm/damon/paddr: support addr_unit for MIGRATE_{HOT,COLD} git bisect good dc731eba2e47fa81d50aa1cb167100889253cfe0 # test job: [e24bb041cafabaa5fa3d76386c86af389cc324f5] https://lava.sirena.org.uk/scheduler/job/1781660 # good: [e24bb041cafabaa5fa3d76386c86af389cc324f5] mm/memremap: reject unreasonable folio/compound page sizes in memremap_pages() git bisect good e24bb041cafabaa5fa3d76386c86af389cc324f5 # test job: [62fd63f4688f40f01a6df23225523ece10d4b69a] https://lava.sirena.org.uk/scheduler/job/1781975 # bad: [62fd63f4688f40f01a6df23225523ece10d4b69a] dma-remap: drop nth_page() in dma_common_contiguous_remap() git bisect bad 62fd63f4688f40f01a6df23225523ece10d4b69a # test job: [cb42f7f6d9e4eff4e5259cddf82fd913306b8fe7] https://lava.sirena.org.uk/scheduler/job/1782145 # good: [cb42f7f6d9e4eff4e5259cddf82fd913306b8fe7] fs: hugetlbfs: remove nth_page() usage within folio in adjust_range_hwpoison() git bisect good cb42f7f6d9e4eff4e5259cddf82fd913306b8fe7 # test job: [db076b5db550aa34169dceee81d0974c7b2a2482] https://lava.sirena.org.uk/scheduler/job/1782813 # bad: [db076b5db550aa34169dceee81d0974c7b2a2482] mm/gup: remove record_subpages() git bisect bad db076b5db550aa34169dceee81d0974c7b2a2482 # test job: [891d0b3189945a5c37ce92c4e5337ec2c17b6378] https://lava.sirena.org.uk/scheduler/job/1782916 # good: [891d0b3189945a5c37ce92c4e5337ec2c17b6378] mm/pagewalk: drop nth_page() usage within folio in folio_walk_start() git bisect good 891d0b3189945a5c37ce92c4e5337ec2c17b6378 # test job: [21999f6315d786cbd21d5b2d0ad56f3f6125279f] https://lava.sirena.org.uk/scheduler/job/1783020 # good: [21999f6315d786cbd21d5b2d0ad56f3f6125279f] mm/gup: drop nth_page() usage within folio when recording subpages git bisect good 21999f6315d786cbd21d5b2d0ad56f3f6125279f # first bad commit: [db076b5db550aa34169dceee81d0974c7b2a2482] mm/gup: remove record_subpages()
On 08.09.25 17:16, Mark Brown wrote: > On Mon, Sep 01, 2025 at 05:03:40PM +0200, David Hildenbrand wrote: >> We can just cleanup the code by calculating the #refs earlier, >> so we can just inline what remains of record_subpages(). >> >> Calculate the number of references/pages ahead of times, and record them >> only once all our tests passed. > > I'm seeing failures in kselftest-mm in -next on at least Raspberry Pi 4 > and Orion O6 which bisect to this patch. I'm seeing a NULL pointer > dereference during the GUP test (which isn't actually doing anything as > I'm just using a standard defconfig rather than one with the mm > fragment): On which -next label are you on? next-20250908 should no longer have that commit. -- Cheers David / dhildenb
On Mon, Sep 08, 2025 at 05:22:24PM +0200, David Hildenbrand wrote: > On 08.09.25 17:16, Mark Brown wrote: > > I'm seeing failures in kselftest-mm in -next on at least Raspberry Pi 4 > > and Orion O6 which bisect to this patch. I'm seeing a NULL pointer > On which -next label are you on? next-20250908 should no longer have that > commit. Ah, sorry - it was Friday's -next but I only saw the report this morning. Sorry for the noise.
On 9/1/25 8:03 AM, David Hildenbrand wrote: > We can just cleanup the code by calculating the #refs earlier, > so we can just inline what remains of record_subpages(). > > Calculate the number of references/pages ahead of times, and record them > only once all our tests passed. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/gup.c | 25 ++++++++----------------- > 1 file changed, 8 insertions(+), 17 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index c10cd969c1a3b..f0f4d1a68e094 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) > #ifdef CONFIG_MMU > > #ifdef CONFIG_HAVE_GUP_FAST > -static int record_subpages(struct page *page, unsigned long sz, > - unsigned long addr, unsigned long end, > - struct page **pages) > -{ > - int nr; > - > - page += (addr & (sz - 1)) >> PAGE_SHIFT; > - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) > - pages[nr] = page++; > - > - return nr; > -} > - > /** > * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. > * @page: pointer to page to be grabbed > @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > if (pmd_special(orig)) > return 0; > > - page = pmd_page(orig); > - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); > + refs = (end - addr) >> PAGE_SHIFT; > + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > if (!folio) > @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > } > > *nr += refs; > + for (; refs; refs--) > + *(pages++) = page++; > folio_set_referenced(folio); > return 1; > } > @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > if (pud_special(orig)) > return 0; > > - page = pud_page(orig); > - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); > + refs = (end - addr) >> PAGE_SHIFT; > + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > if (!folio) > @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > } > > *nr += refs; > + for (; refs; refs--) > + *(pages++) = page++; Hi David, Probably a similar sentiment as Lorenzo here...the above diffs make the code *worse* to read. In fact, I recall adding record_subpages() here long ago, specifically to help clarify what was going on. Now it's been returned to it's original, cryptic form. Just my take on it, for whatever that's worth. :) thanks, -- John Hubbard > folio_set_referenced(folio); > return 1; > }
On 06.09.25 03:05, John Hubbard wrote: > On 9/1/25 8:03 AM, David Hildenbrand wrote: >> We can just cleanup the code by calculating the #refs earlier, >> so we can just inline what remains of record_subpages(). >> >> Calculate the number of references/pages ahead of times, and record them >> only once all our tests passed. >> >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> mm/gup.c | 25 ++++++++----------------- >> 1 file changed, 8 insertions(+), 17 deletions(-) >> >> diff --git a/mm/gup.c b/mm/gup.c >> index c10cd969c1a3b..f0f4d1a68e094 100644 >> --- a/mm/gup.c >> +++ b/mm/gup.c >> @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) >> #ifdef CONFIG_MMU >> >> #ifdef CONFIG_HAVE_GUP_FAST >> -static int record_subpages(struct page *page, unsigned long sz, >> - unsigned long addr, unsigned long end, >> - struct page **pages) >> -{ >> - int nr; >> - >> - page += (addr & (sz - 1)) >> PAGE_SHIFT; >> - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) >> - pages[nr] = page++; >> - >> - return nr; >> -} >> - >> /** >> * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. >> * @page: pointer to page to be grabbed >> @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >> if (pmd_special(orig)) >> return 0; >> >> - page = pmd_page(orig); >> - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); >> + refs = (end - addr) >> PAGE_SHIFT; >> + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); >> >> folio = try_grab_folio_fast(page, refs, flags); >> if (!folio) >> @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >> } >> >> *nr += refs; >> + for (; refs; refs--) >> + *(pages++) = page++; >> folio_set_referenced(folio); >> return 1; >> } >> @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >> if (pud_special(orig)) >> return 0; >> >> - page = pud_page(orig); >> - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); >> + refs = (end - addr) >> PAGE_SHIFT; >> + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); >> >> folio = try_grab_folio_fast(page, refs, flags); >> if (!folio) >> @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >> } >> >> *nr += refs; >> + for (; refs; refs--) >> + *(pages++) = page++; > > Hi David, Hi! > > Probably a similar sentiment as Lorenzo here...the above diffs make the code > *worse* to read. In fact, I recall adding record_subpages() here long ago, > specifically to help clarify what was going on. Well, there is a lot I dislike about record_subpages() to go back there. Starting with "as Willy keeps explaining, the concept of subpages do not exist and ending with "why do we fill out the array even on failure". :) > > Now it's been returned to it's original, cryptic form. > The code in the caller was so uncryptic that both me and Lorenzo missed that magical addition. :P > Just my take on it, for whatever that's worth. :) As always, appreciated. I could of course keep the simple loop in some "record_folio_pages" function and clean up what I dislike about record_subpages(). But I much rather want the call chain to be cleaned up instead, if possible. Roughly, what I am thinking (limiting it to pte+pmd case) about is the following: From d6d6d21dbf435d8030782a627175e36e6c7b2dfb Mon Sep 17 00:00:00 2001 From: David Hildenbrand <david@redhat.com> Date: Sat, 6 Sep 2025 08:33:42 +0200 Subject: [PATCH] tmp Signed-off-by: David Hildenbrand <david@redhat.com> --- mm/gup.c | 79 ++++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 22420f2069ee1..98907ead749c0 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2845,12 +2845,11 @@ static void __maybe_unused gup_fast_undo_dev_pagemap(int *nr, int nr_start, * also check pmd here to make sure pmd doesn't change (corresponds to * pmdp_collapse_flush() in the THP collapse code path). */ -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, - unsigned long end, unsigned int flags, struct page **pages, - int *nr) +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, + unsigned long end, unsigned int flags, struct page **pages) { struct dev_pagemap *pgmap = NULL; - int ret = 0; + unsigned long nr_pages = 0; pte_t *ptep, *ptem; ptem = ptep = pte_offset_map(&pmd, addr); @@ -2908,24 +2907,20 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, * details. */ if (flags & FOLL_PIN) { - ret = arch_make_folio_accessible(folio); - if (ret) { + if (arch_make_folio_accessible(folio)) { gup_put_folio(folio, 1, flags); goto pte_unmap; } } folio_set_referenced(folio); - pages[*nr] = page; - (*nr)++; + pages[nr_pages++] = page; } while (ptep++, addr += PAGE_SIZE, addr != end); - ret = 1; - pte_unmap: if (pgmap) put_dev_pagemap(pgmap); pte_unmap(ptem); - return ret; + return nr_pages; } #else @@ -2938,21 +2933,24 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, * get_user_pages_fast_only implementation that can pin pages. Thus it's still * useful to have gup_fast_pmd_leaf even if we can't operate on ptes. */ -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, - unsigned long end, unsigned int flags, struct page **pages, - int *nr) +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, + unsigned long end, unsigned int flags, struct page **pages) { return 0; } #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ -static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, - unsigned long end, unsigned int flags, struct page **pages, - int *nr) +static unsigned long gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, + unsigned long end, unsigned int flags, struct page **pages) { + const unsigned long nr_pages = (end - addr) >> PAGE_SHIFT; struct page *page; struct folio *folio; - int refs; + unsigned long i; + + /* See gup_fast_pte_range() */ + if (pmd_protnone(orig)) + return 0; if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0; @@ -2960,33 +2958,30 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, if (pmd_special(orig)) return 0; - refs = (end - addr) >> PAGE_SHIFT; page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - folio = try_grab_folio_fast(page, refs, flags); + folio = try_grab_folio_fast(page, nr_pages, flags); if (!folio) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { - gup_put_folio(folio, refs, flags); + gup_put_folio(folio, nr_pages, flags); return 0; } if (!gup_fast_folio_allowed(folio, flags)) { - gup_put_folio(folio, refs, flags); + gup_put_folio(folio, nr_pages, flags); return 0; } if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { - gup_put_folio(folio, refs, flags); + gup_put_folio(folio, nr_pages, flags); return 0; } - pages += *nr; - *nr += refs; - for (; refs; refs--) + for (i = 0; i < nr_pages; i++) *(pages++) = page++; folio_set_referenced(folio); - return 1; + return nr_pages; } static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, @@ -3033,11 +3028,11 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, return 1; } -static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, - unsigned long end, unsigned int flags, struct page **pages, - int *nr) +static unsigned long gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, unsigned int flags, struct page **pages) { - unsigned long next; + unsigned long cur_nr_pages, next; + unsigned long nr_pages = 0; pmd_t *pmdp; pmdp = pmd_offset_lockless(pudp, pud, addr); @@ -3046,23 +3041,21 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, next = pmd_addr_end(addr, end); if (!pmd_present(pmd)) - return 0; + break; - if (unlikely(pmd_leaf(pmd))) { - /* See gup_fast_pte_range() */ - if (pmd_protnone(pmd)) - return 0; + if (unlikely(pmd_leaf(pmd))) + cur_nr_pages = gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, pages); + else + cur_nr_pages = gup_fast_pte_range(pmd, pmdp, addr, next, flags, pages); - if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, - pages, nr)) - return 0; + nr_pages += cur_nr_pages; + pages += cur_nr_pages; - } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, - pages, nr)) - return 0; + if (nr_pages != (next - addr) >> PAGE_SIZE) + break; } while (pmdp++, addr = next, addr != end); - return 1; + return nr_pages; } static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, -- 2.50.1 Oh, I might even have found a bug moving away from that questionable "ret==1 means success" handling in gup_fast_pte_range()? Will have to double-check, but likely the following is the right thing to do. From 8f48b25ef93e7ef98611fd58ec89384ad5171782 Mon Sep 17 00:00:00 2001 From: David Hildenbrand <david@redhat.com> Date: Sat, 6 Sep 2025 08:46:45 +0200 Subject: [PATCH] mm/gup: fix handling of errors from arch_make_folio_accessible() in follow_page_pte() In case we call arch_make_folio_accessible() and it fails, we would incorrectly return a value that is "!= 0" to the caller, indicating that we pinned all requested pages and that the caller can keep going. follow_page_pte() is not supposed to return error values, but instead 0 on failure and 1 on success. That is of course wrong, because the caller will just keep going pinning more pages. If we happen to pin a page afterwards, we're in trouble, because we essentially skipped some pages. Fixes: f28d43636d6f ("mm/gup/writeback: add callbacks for inaccessible pages") Signed-off-by: David Hildenbrand <david@redhat.com> --- mm/gup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 22420f2069ee1..cff226ec0ee7d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2908,8 +2908,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, * details. */ if (flags & FOLL_PIN) { - ret = arch_make_folio_accessible(folio); - if (ret) { + if (arch_make_folio_accessible(folio)) { gup_put_folio(folio, 1, flags); goto pte_unmap; } -- 2.50.1 -- Cheers David / dhildenb
On Sat, Sep 06, 2025 at 08:56:48AM +0200, David Hildenbrand wrote: > On 06.09.25 03:05, John Hubbard wrote: > > > > Probably a similar sentiment as Lorenzo here...the above diffs make the code > > *worse* to read. In fact, I recall adding record_subpages() here long ago, > > specifically to help clarify what was going on. > > Well, there is a lot I dislike about record_subpages() to go back there. > Starting with "as Willy keeps explaining, the concept of subpages do > not exist and ending with "why do we fill out the array even on failure". Yes > > :) > > > > > Now it's been returned to it's original, cryptic form. > > > > The code in the caller was so uncryptic that both me and Lorenzo missed > that magical addition. :P :'( > > > Just my take on it, for whatever that's worth. :) > > As always, appreciated. > > I could of course keep the simple loop in some "record_folio_pages" > function and clean up what I dislike about record_subpages(). > > But I much rather want the call chain to be cleaned up instead, if possible. > > > Roughly, what I am thinking (limiting it to pte+pmd case) about is the following: I cannot get the below to apply even with the original patch here applied + fix. It looks like (in mm-new :) commit e73f43a66d5f ("mm/gup: remove dead pgmap refcounting code") by Alastair has conflicted here, but even then I can't make it apply, with/without your fix...! > > > From d6d6d21dbf435d8030782a627175e36e6c7b2dfb Mon Sep 17 00:00:00 2001 > From: David Hildenbrand <david@redhat.com> > Date: Sat, 6 Sep 2025 08:33:42 +0200 > Subject: [PATCH] tmp > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/gup.c | 79 ++++++++++++++++++++++++++------------------------------ > 1 file changed, 36 insertions(+), 43 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index 22420f2069ee1..98907ead749c0 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2845,12 +2845,11 @@ static void __maybe_unused gup_fast_undo_dev_pagemap(int *nr, int nr_start, > * also check pmd here to make sure pmd doesn't change (corresponds to > * pmdp_collapse_flush() in the THP collapse code path). > */ > -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > struct dev_pagemap *pgmap = NULL; > - int ret = 0; > + unsigned long nr_pages = 0; > pte_t *ptep, *ptem; > ptem = ptep = pte_offset_map(&pmd, addr); > @@ -2908,24 +2907,20 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > * details. > */ > if (flags & FOLL_PIN) { > - ret = arch_make_folio_accessible(folio); > - if (ret) { > + if (arch_make_folio_accessible(folio)) { > gup_put_folio(folio, 1, flags); > goto pte_unmap; > } > } > folio_set_referenced(folio); > - pages[*nr] = page; > - (*nr)++; > + pages[nr_pages++] = page; > } while (ptep++, addr += PAGE_SIZE, addr != end); > - ret = 1; > - > pte_unmap: > if (pgmap) > put_dev_pagemap(pgmap); > pte_unmap(ptem); > - return ret; > + return nr_pages; > } > #else > @@ -2938,21 +2933,24 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > * get_user_pages_fast_only implementation that can pin pages. Thus it's still > * useful to have gup_fast_pmd_leaf even if we can't operate on ptes. > */ > -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > return 0; > } > #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ > -static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > + const unsigned long nr_pages = (end - addr) >> PAGE_SHIFT; > struct page *page; > struct folio *folio; > - int refs; > + unsigned long i; > + > + /* See gup_fast_pte_range() */ > + if (pmd_protnone(orig)) > + return 0; > if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) > return 0; > @@ -2960,33 +2958,30 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > if (pmd_special(orig)) > return 0; > - refs = (end - addr) >> PAGE_SHIFT; > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > - folio = try_grab_folio_fast(page, refs, flags); > + folio = try_grab_folio_fast(page, nr_pages, flags); > if (!folio) > return 0; > if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > if (!gup_fast_folio_allowed(folio, flags)) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > - pages += *nr; > - *nr += refs; > - for (; refs; refs--) > + for (i = 0; i < nr_pages; i++) > *(pages++) = page++; > folio_set_referenced(folio); > - return 1; > + return nr_pages; > } > static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > @@ -3033,11 +3028,11 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > return 1; > } > -static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > - unsigned long next; > + unsigned long cur_nr_pages, next; > + unsigned long nr_pages = 0; > pmd_t *pmdp; > pmdp = pmd_offset_lockless(pudp, pud, addr); > @@ -3046,23 +3041,21 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, > next = pmd_addr_end(addr, end); > if (!pmd_present(pmd)) > - return 0; > + break; > - if (unlikely(pmd_leaf(pmd))) { > - /* See gup_fast_pte_range() */ > - if (pmd_protnone(pmd)) > - return 0; > + if (unlikely(pmd_leaf(pmd))) > + cur_nr_pages = gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, pages); > + else > + cur_nr_pages = gup_fast_pte_range(pmd, pmdp, addr, next, flags, pages); > - if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + nr_pages += cur_nr_pages; > + pages += cur_nr_pages; > - } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + if (nr_pages != (next - addr) >> PAGE_SIZE) > + break; > } while (pmdp++, addr = next, addr != end); > - return 1; > + return nr_pages; > } > static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, OK I guess you intentionally left the rest as a TODO :) So I'll wait for you to post it before reviewing in-depth. This generally LGTM as an approach, getting rid of *nr is important that's really horrible. > -- > 2.50.1 > > > > Oh, I might even have found a bug moving away from that questionable > "ret==1 means success" handling in gup_fast_pte_range()? Will > have to double-check, but likely the following is the right thing to do. > > > > From 8f48b25ef93e7ef98611fd58ec89384ad5171782 Mon Sep 17 00:00:00 2001 > From: David Hildenbrand <david@redhat.com> > Date: Sat, 6 Sep 2025 08:46:45 +0200 > Subject: [PATCH] mm/gup: fix handling of errors from > arch_make_folio_accessible() in follow_page_pte() > > In case we call arch_make_folio_accessible() and it fails, we would > incorrectly return a value that is "!= 0" to the caller, indicating that > we pinned all requested pages and that the caller can keep going. > > follow_page_pte() is not supposed to return error values, but instead > 0 on failure and 1 on success. > > That is of course wrong, because the caller will just keep going pinning > more pages. If we happen to pin a page afterwards, we're in trouble, > because we essentially skipped some pages. > > Fixes: f28d43636d6f ("mm/gup/writeback: add callbacks for inaccessible pages") > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/gup.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index 22420f2069ee1..cff226ec0ee7d 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2908,8 +2908,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > * details. > */ > if (flags & FOLL_PIN) { > - ret = arch_make_folio_accessible(folio); > - if (ret) { > + if (arch_make_folio_accessible(folio)) { Oh Lord above. Lol. Yikes. Yeah I think your fix is valid... > gup_put_folio(folio, 1, flags); > goto pte_unmap; > } > -- > 2.50.1 > > > -- > Cheers > > David / dhildenb >
On 08.09.25 14:25, Lorenzo Stoakes wrote: > On Sat, Sep 06, 2025 at 08:56:48AM +0200, David Hildenbrand wrote: >> On 06.09.25 03:05, John Hubbard wrote: >>> >>> Probably a similar sentiment as Lorenzo here...the above diffs make the code >>> *worse* to read. In fact, I recall adding record_subpages() here long ago, >>> specifically to help clarify what was going on. >> >> Well, there is a lot I dislike about record_subpages() to go back there. >> Starting with "as Willy keeps explaining, the concept of subpages do >> not exist and ending with "why do we fill out the array even on failure". > > Yes > >> >> :) >> >>> >>> Now it's been returned to it's original, cryptic form. >>> >> >> The code in the caller was so uncryptic that both me and Lorenzo missed >> that magical addition. :P > > :'( > >> >>> Just my take on it, for whatever that's worth. :) >> >> As always, appreciated. >> >> I could of course keep the simple loop in some "record_folio_pages" >> function and clean up what I dislike about record_subpages(). >> >> But I much rather want the call chain to be cleaned up instead, if possible. >> >> >> Roughly, what I am thinking (limiting it to pte+pmd case) about is the following: > > I cannot get the below to apply even with the original patch here applied + fix. > > It looks like (in mm-new :) commit e73f43a66d5f ("mm/gup: remove dead pgmap > refcounting code") by Alastair has conflicted here, but even then I can't make > it apply, with/without your fix...! To be clear: it was never intended to be applied, because it wouldn't even compile in the current form. It was based on this nth_page submission + fix. [...] >> } >> static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, > > OK I guess you intentionally left the rest as a TODO :) > > So I'll wait for you to post it before reviewing in-depth. > > This generally LGTM as an approach, getting rid of *nr is important that's > really horrible. Yes. Expect a cleanup in that direction soonish (again, either from me or someone else I poke) > >> -- >> 2.50.1 >> >> >> >> Oh, I might even have found a bug moving away from that questionable >> "ret==1 means success" handling in gup_fast_pte_range()? Will >> have to double-check, but likely the following is the right thing to do. >> >> >> >> From 8f48b25ef93e7ef98611fd58ec89384ad5171782 Mon Sep 17 00:00:00 2001 >> From: David Hildenbrand <david@redhat.com> >> Date: Sat, 6 Sep 2025 08:46:45 +0200 >> Subject: [PATCH] mm/gup: fix handling of errors from >> arch_make_folio_accessible() in follow_page_pte() >> >> In case we call arch_make_folio_accessible() and it fails, we would >> incorrectly return a value that is "!= 0" to the caller, indicating that >> we pinned all requested pages and that the caller can keep going. >> >> follow_page_pte() is not supposed to return error values, but instead >> 0 on failure and 1 on success. >> >> That is of course wrong, because the caller will just keep going pinning >> more pages. If we happen to pin a page afterwards, we're in trouble, >> because we essentially skipped some pages. >> >> Fixes: f28d43636d6f ("mm/gup/writeback: add callbacks for inaccessible pages") >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> mm/gup.c | 3 +-- >> 1 file changed, 1 insertion(+), 2 deletions(-) >> >> diff --git a/mm/gup.c b/mm/gup.c >> index 22420f2069ee1..cff226ec0ee7d 100644 >> --- a/mm/gup.c >> +++ b/mm/gup.c >> @@ -2908,8 +2908,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, >> * details. >> */ >> if (flags & FOLL_PIN) { >> - ret = arch_make_folio_accessible(folio); >> - if (ret) { >> + if (arch_make_folio_accessible(folio)) { > > Oh Lord above. Lol. Yikes. > > Yeah I think your fix is valid... I sent it out earlier today. Fortunately that function shouldn't usually really fail IIUC. -- Cheers David / dhildenb
On 9/8/25 5:53 AM, David Hildenbrand wrote: > On 08.09.25 14:25, Lorenzo Stoakes wrote: >> On Sat, Sep 06, 2025 at 08:56:48AM +0200, David Hildenbrand wrote: >>> On 06.09.25 03:05, John Hubbard wrote: ... >>> Roughly, what I am thinking (limiting it to pte+pmd case) about is >>> the following: >> >> I cannot get the below to apply even with the original patch here >> applied + fix. >> >> It looks like (in mm-new :) commit e73f43a66d5f ("mm/gup: remove dead >> pgmap >> refcounting code") by Alastair has conflicted here, but even then I >> can't make >> it apply, with/without your fix...! I eventually resorted to telling the local AI to read the diffs and apply them on top of the nth_page series locally. :) Attaching the resulting patch, which worked well enough to at least see the proposal clearly. > > To be clear: it was never intended to be applied, because it wouldn't > even compile in the current form. > > It was based on this nth_page submission + fix. > > thanks, -- John Hubbard
On 9/5/25 11:56 PM, David Hildenbrand wrote: > On 06.09.25 03:05, John Hubbard wrote: >> On 9/1/25 8:03 AM, David Hildenbrand wrote: ...> Well, there is a lot I dislike about record_subpages() to go back there. > Starting with "as Willy keeps explaining, the concept of subpages do > not exist and ending with "why do we fill out the array even on failure". > > :) I am also very glad to see the entire concept of subpages disappear. >> >> Now it's been returned to it's original, cryptic form. >> > > The code in the caller was so uncryptic that both me and Lorenzo missed > that magical addition. :P > >> Just my take on it, for whatever that's worth. :) > > As always, appreciated. > > I could of course keep the simple loop in some "record_folio_pages" > function and clean up what I dislike about record_subpages(). > > But I much rather want the call chain to be cleaned up instead, if > possible. > Right! The primary way that record_subpages() helped was in showing what was going on: a function call helps a lot to self-document, sometimes. > > Roughly, what I am thinking (limiting it to pte+pmd case) about is the > following: The code below looks much cleaner, that's great! thanks, -- John Hubbard > > > From d6d6d21dbf435d8030782a627175e36e6c7b2dfb Mon Sep 17 00:00:00 2001 > From: David Hildenbrand <david@redhat.com> > Date: Sat, 6 Sep 2025 08:33:42 +0200 > Subject: [PATCH] tmp > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/gup.c | 79 ++++++++++++++++++++++++++------------------------------ > 1 file changed, 36 insertions(+), 43 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index 22420f2069ee1..98907ead749c0 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2845,12 +2845,11 @@ static void __maybe_unused > gup_fast_undo_dev_pagemap(int *nr, int nr_start, > * also check pmd here to make sure pmd doesn't change (corresponds to > * pmdp_collapse_flush() in the THP collapse code path). > */ > -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, > unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > struct dev_pagemap *pgmap = NULL; > - int ret = 0; > + unsigned long nr_pages = 0; > pte_t *ptep, *ptem; > > ptem = ptep = pte_offset_map(&pmd, addr); > @@ -2908,24 +2907,20 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t > *pmdp, unsigned long addr, > * details. > */ > if (flags & FOLL_PIN) { > - ret = arch_make_folio_accessible(folio); > - if (ret) { > + if (arch_make_folio_accessible(folio)) { > gup_put_folio(folio, 1, flags); > goto pte_unmap; > } > } > folio_set_referenced(folio); > - pages[*nr] = page; > - (*nr)++; > + pages[nr_pages++] = page; > } while (ptep++, addr += PAGE_SIZE, addr != end); > > - ret = 1; > - > pte_unmap: > if (pgmap) > put_dev_pagemap(pgmap); > pte_unmap(ptem); > - return ret; > + return nr_pages; > } > #else > > @@ -2938,21 +2933,24 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t > *pmdp, unsigned long addr, > * get_user_pages_fast_only implementation that can pin pages. Thus > it's still > * useful to have gup_fast_pmd_leaf even if we can't operate on ptes. > */ > -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, > unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > return 0; > } > #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ > > -static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, > unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > + const unsigned long nr_pages = (end - addr) >> PAGE_SHIFT; > struct page *page; > struct folio *folio; > - int refs; > + unsigned long i; > + > + /* See gup_fast_pte_range() */ > + if (pmd_protnone(orig)) > + return 0; > > if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) > return 0; > @@ -2960,33 +2958,30 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t > *pmdp, unsigned long addr, > if (pmd_special(orig)) > return 0; > > - refs = (end - addr) >> PAGE_SHIFT; > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > - folio = try_grab_folio_fast(page, refs, flags); > + folio = try_grab_folio_fast(page, nr_pages, flags); > if (!folio) > return 0; > > if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > > if (!gup_fast_folio_allowed(folio, flags)) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio- > >page)) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > > - pages += *nr; > - *nr += refs; > - for (; refs; refs--) > + for (i = 0; i < nr_pages; i++) > *(pages++) = page++; > folio_set_referenced(folio); > - return 1; > + return nr_pages; > } > > static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > @@ -3033,11 +3028,11 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t > *pudp, unsigned long addr, > return 1; > } > > -static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pmd_range(pud_t *pudp, pud_t pud, > unsigned long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > - unsigned long next; > + unsigned long cur_nr_pages, next; > + unsigned long nr_pages = 0; > pmd_t *pmdp; > > pmdp = pmd_offset_lockless(pudp, pud, addr); > @@ -3046,23 +3041,21 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t > pud, unsigned long addr, > > next = pmd_addr_end(addr, end); > if (!pmd_present(pmd)) > - return 0; > + break; > > - if (unlikely(pmd_leaf(pmd))) { > - /* See gup_fast_pte_range() */ > - if (pmd_protnone(pmd)) > - return 0; > + if (unlikely(pmd_leaf(pmd))) > + cur_nr_pages = gup_fast_pmd_leaf(pmd, pmdp, addr, next, > flags, pages); > + else > + cur_nr_pages = gup_fast_pte_range(pmd, pmdp, addr, next, > flags, pages); > > - if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + nr_pages += cur_nr_pages; > + pages += cur_nr_pages; > > - } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + if (nr_pages != (next - addr) >> PAGE_SIZE) > + break; > } while (pmdp++, addr = next, addr != end); > > - return 1; > + return nr_pages; > } > > static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
>> Roughly, what I am thinking (limiting it to pte+pmd case) about is the >> following: > > The code below looks much cleaner, that's great! Great, I (or Aristeu if he has capacity) will clean this all up soon. -- Cheers David / dhildenb
> pmdp = pmd_offset_lockless(pudp, pud, addr); > @@ -3046,23 +3041,21 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, > > next = pmd_addr_end(addr, end); > if (!pmd_present(pmd)) > - return 0; > + break; > > - if (unlikely(pmd_leaf(pmd))) { > - /* See gup_fast_pte_range() */ > - if (pmd_protnone(pmd)) > - return 0; > + if (unlikely(pmd_leaf(pmd))) > + cur_nr_pages = gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, pages); > + else > + cur_nr_pages = gup_fast_pte_range(pmd, pmdp, addr, next, flags, pages); > > - if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + nr_pages += cur_nr_pages; > + pages += cur_nr_pages; > > - } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + if (nr_pages != (next - addr) >> PAGE_SIZE) > + break; ^ cur_nr_pages. Open for suggestions on how to make that thing here even better. -- Cheers David / dhildenb
On 01.09.25 17:03, David Hildenbrand wrote: > We can just cleanup the code by calculating the #refs earlier, > so we can just inline what remains of record_subpages(). > > Calculate the number of references/pages ahead of times, and record them > only once all our tests passed. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/gup.c | 25 ++++++++----------------- > 1 file changed, 8 insertions(+), 17 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index c10cd969c1a3b..f0f4d1a68e094 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) > #ifdef CONFIG_MMU > > #ifdef CONFIG_HAVE_GUP_FAST > -static int record_subpages(struct page *page, unsigned long sz, > - unsigned long addr, unsigned long end, > - struct page **pages) > -{ > - int nr; > - > - page += (addr & (sz - 1)) >> PAGE_SHIFT; > - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) > - pages[nr] = page++; > - > - return nr; > -} > - > /** > * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. > * @page: pointer to page to be grabbed > @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > if (pmd_special(orig)) > return 0; > > - page = pmd_page(orig); > - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); > + refs = (end - addr) >> PAGE_SHIFT; > + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > if (!folio) > @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > } > > *nr += refs; > + for (; refs; refs--) > + *(pages++) = page++; > folio_set_referenced(folio); > return 1; > } > @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > if (pud_special(orig)) > return 0; > > - page = pud_page(orig); > - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); > + refs = (end - addr) >> PAGE_SHIFT; > + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > if (!folio) > @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > } > > *nr += refs; > + for (; refs; refs--) > + *(pages++) = page++; > folio_set_referenced(folio); > return 1; > } Okay, this code is nasty. We should rework this code to just return the nr and receive a the proper pages pointer, getting rid of the "*nr" parameter. For the time being, the following should do the trick: commit bfd07c995814354f6b66c5b6a72e96a7aa9fb73b (HEAD -> nth_page) Author: David Hildenbrand <david@redhat.com> Date: Fri Sep 5 08:38:43 2025 +0200 fixup: mm/gup: remove record_subpages() pages is not adjusted by the caller, but idnexed by existing *nr. Signed-off-by: David Hildenbrand <david@redhat.com> diff --git a/mm/gup.c b/mm/gup.c index 010fe56f6e132..22420f2069ee1 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2981,6 +2981,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, return 0; } + pages += *nr; *nr += refs; for (; refs; refs--) *(pages++) = page++; @@ -3024,6 +3025,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, return 0; } + pages += *nr; *nr += refs; for (; refs; refs--) *(pages++) = page++; -- Cheers David / dhildenb
On Fri, Sep 05, 2025 at 08:41:23AM +0200, David Hildenbrand wrote: > On 01.09.25 17:03, David Hildenbrand wrote: > > We can just cleanup the code by calculating the #refs earlier, > > so we can just inline what remains of record_subpages(). > > > > Calculate the number of references/pages ahead of times, and record them > > only once all our tests passed. > > > > Signed-off-by: David Hildenbrand <david@redhat.com> > > --- > > mm/gup.c | 25 ++++++++----------------- > > 1 file changed, 8 insertions(+), 17 deletions(-) > > > > diff --git a/mm/gup.c b/mm/gup.c > > index c10cd969c1a3b..f0f4d1a68e094 100644 > > --- a/mm/gup.c > > +++ b/mm/gup.c > > @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) > > #ifdef CONFIG_MMU > > #ifdef CONFIG_HAVE_GUP_FAST > > -static int record_subpages(struct page *page, unsigned long sz, > > - unsigned long addr, unsigned long end, > > - struct page **pages) > > -{ > > - int nr; > > - > > - page += (addr & (sz - 1)) >> PAGE_SHIFT; > > - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) > > - pages[nr] = page++; > > - > > - return nr; > > -} > > - > > /** > > * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. > > * @page: pointer to page to be grabbed > > @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > > if (pmd_special(orig)) > > return 0; > > - page = pmd_page(orig); > > - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); > > + refs = (end - addr) >> PAGE_SHIFT; > > + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > > if (!folio) > > @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > > } > > *nr += refs; > > + for (; refs; refs--) > > + *(pages++) = page++; > > folio_set_referenced(folio); > > return 1; > > } > > @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > > if (pud_special(orig)) > > return 0; > > - page = pud_page(orig); > > - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); > > + refs = (end - addr) >> PAGE_SHIFT; > > + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > > if (!folio) > > @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > > } > > *nr += refs; > > + for (; refs; refs--) > > + *(pages++) = page++; > > folio_set_referenced(folio); > > return 1; > > } > > Okay, this code is nasty. We should rework this code to just return the nr and receive a the proper > pages pointer, getting rid of the "*nr" parameter. > > For the time being, the following should do the trick: > > commit bfd07c995814354f6b66c5b6a72e96a7aa9fb73b (HEAD -> nth_page) > Author: David Hildenbrand <david@redhat.com> > Date: Fri Sep 5 08:38:43 2025 +0200 > > fixup: mm/gup: remove record_subpages() > pages is not adjusted by the caller, but idnexed by existing *nr. > Signed-off-by: David Hildenbrand <david@redhat.com> > > diff --git a/mm/gup.c b/mm/gup.c > index 010fe56f6e132..22420f2069ee1 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2981,6 +2981,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > return 0; > } > + pages += *nr; > *nr += refs; > for (; refs; refs--) > *(pages++) = page++; > @@ -3024,6 +3025,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > return 0; > } > + pages += *nr; > *nr += refs; > for (; refs; refs--) > *(pages++) = page++; Can this get folded in soon? This bug is causing crashes in AF_ALG too. Thanks, - Eric
On 06.09.25 01:00, Eric Biggers wrote: > On Fri, Sep 05, 2025 at 08:41:23AM +0200, David Hildenbrand wrote: >> On 01.09.25 17:03, David Hildenbrand wrote: >>> We can just cleanup the code by calculating the #refs earlier, >>> so we can just inline what remains of record_subpages(). >>> >>> Calculate the number of references/pages ahead of times, and record them >>> only once all our tests passed. >>> >>> Signed-off-by: David Hildenbrand <david@redhat.com> >>> --- >>> mm/gup.c | 25 ++++++++----------------- >>> 1 file changed, 8 insertions(+), 17 deletions(-) >>> >>> diff --git a/mm/gup.c b/mm/gup.c >>> index c10cd969c1a3b..f0f4d1a68e094 100644 >>> --- a/mm/gup.c >>> +++ b/mm/gup.c >>> @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) >>> #ifdef CONFIG_MMU >>> #ifdef CONFIG_HAVE_GUP_FAST >>> -static int record_subpages(struct page *page, unsigned long sz, >>> - unsigned long addr, unsigned long end, >>> - struct page **pages) >>> -{ >>> - int nr; >>> - >>> - page += (addr & (sz - 1)) >> PAGE_SHIFT; >>> - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) >>> - pages[nr] = page++; >>> - >>> - return nr; >>> -} >>> - >>> /** >>> * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. >>> * @page: pointer to page to be grabbed >>> @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >>> if (pmd_special(orig)) >>> return 0; >>> - page = pmd_page(orig); >>> - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); >>> + refs = (end - addr) >> PAGE_SHIFT; >>> + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); >>> folio = try_grab_folio_fast(page, refs, flags); >>> if (!folio) >>> @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >>> } >>> *nr += refs; >>> + for (; refs; refs--) >>> + *(pages++) = page++; >>> folio_set_referenced(folio); >>> return 1; >>> } >>> @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >>> if (pud_special(orig)) >>> return 0; >>> - page = pud_page(orig); >>> - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); >>> + refs = (end - addr) >> PAGE_SHIFT; >>> + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); >>> folio = try_grab_folio_fast(page, refs, flags); >>> if (!folio) >>> @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >>> } >>> *nr += refs; >>> + for (; refs; refs--) >>> + *(pages++) = page++; >>> folio_set_referenced(folio); >>> return 1; >>> } >> >> Okay, this code is nasty. We should rework this code to just return the nr and receive a the proper >> pages pointer, getting rid of the "*nr" parameter. >> >> For the time being, the following should do the trick: >> >> commit bfd07c995814354f6b66c5b6a72e96a7aa9fb73b (HEAD -> nth_page) >> Author: David Hildenbrand <david@redhat.com> >> Date: Fri Sep 5 08:38:43 2025 +0200 >> >> fixup: mm/gup: remove record_subpages() >> pages is not adjusted by the caller, but idnexed by existing *nr. >> Signed-off-by: David Hildenbrand <david@redhat.com> >> >> diff --git a/mm/gup.c b/mm/gup.c >> index 010fe56f6e132..22420f2069ee1 100644 >> --- a/mm/gup.c >> +++ b/mm/gup.c >> @@ -2981,6 +2981,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >> return 0; >> } >> + pages += *nr; >> *nr += refs; >> for (; refs; refs--) >> *(pages++) = page++; >> @@ -3024,6 +3025,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >> return 0; >> } >> + pages += *nr; >> *nr += refs; >> for (; refs; refs--) >> *(pages++) = page++; > > Can this get folded in soon? This bug is causing crashes in AF_ALG too. Andrew immediately dropped the original patch, so it's gone from mm-unstable and should be gone from next soon (today?). -- Cheers David / dhildenb
On Sat, 6 Sep 2025 08:57:37 +0200 David Hildenbrand <david@redhat.com> wrote: > >> @@ -3024,6 +3025,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > >> return 0; > >> } > >> + pages += *nr; > >> *nr += refs; > >> for (; refs; refs--) > >> *(pages++) = page++; > > > > Can this get folded in soon? This bug is causing crashes in AF_ALG too. > > Andrew immediately dropped the original patch, so it's gone from > mm-unstable and should be gone from next soon (today?). I restored it once you sent out the fix. It doesn't seem to be in present -next but it should be there in the next one.
On Fri, Sep 05, 2025 at 08:41:23AM +0200, David Hildenbrand wrote: > On 01.09.25 17:03, David Hildenbrand wrote: > > We can just cleanup the code by calculating the #refs earlier, > > so we can just inline what remains of record_subpages(). > > > > Calculate the number of references/pages ahead of times, and record them > > only once all our tests passed. > > > > Signed-off-by: David Hildenbrand <david@redhat.com> So strange I thought I looked at this...! > > --- > > mm/gup.c | 25 ++++++++----------------- > > 1 file changed, 8 insertions(+), 17 deletions(-) > > > > diff --git a/mm/gup.c b/mm/gup.c > > index c10cd969c1a3b..f0f4d1a68e094 100644 > > --- a/mm/gup.c > > +++ b/mm/gup.c > > @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) > > #ifdef CONFIG_MMU > > #ifdef CONFIG_HAVE_GUP_FAST > > -static int record_subpages(struct page *page, unsigned long sz, > > - unsigned long addr, unsigned long end, > > - struct page **pages) > > -{ > > - int nr; > > - > > - page += (addr & (sz - 1)) >> PAGE_SHIFT; > > - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) > > - pages[nr] = page++; > > - > > - return nr; > > -} > > - > > /** > > * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. > > * @page: pointer to page to be grabbed > > @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > > if (pmd_special(orig)) > > return 0; > > - page = pmd_page(orig); > > - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); > > + refs = (end - addr) >> PAGE_SHIFT; > > + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > > if (!folio) > > @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > > } > > *nr += refs; > > + for (; refs; refs--) > > + *(pages++) = page++; > > folio_set_referenced(folio); > > return 1; > > } > > @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > > if (pud_special(orig)) > > return 0; > > - page = pud_page(orig); > > - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); > > + refs = (end - addr) >> PAGE_SHIFT; > > + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > > folio = try_grab_folio_fast(page, refs, flags); > > if (!folio) > > @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > > } > > *nr += refs; > > + for (; refs; refs--) > > + *(pages++) = page++; > > folio_set_referenced(folio); > > return 1; > > } > > Okay, this code is nasty. We should rework this code to just return the nr and receive a the proper > pages pointer, getting rid of the "*nr" parameter. > > For the time being, the following should do the trick: > > commit bfd07c995814354f6b66c5b6a72e96a7aa9fb73b (HEAD -> nth_page) > Author: David Hildenbrand <david@redhat.com> > Date: Fri Sep 5 08:38:43 2025 +0200 > > fixup: mm/gup: remove record_subpages() > pages is not adjusted by the caller, but idnexed by existing *nr. > Signed-off-by: David Hildenbrand <david@redhat.com> > > diff --git a/mm/gup.c b/mm/gup.c > index 010fe56f6e132..22420f2069ee1 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2981,6 +2981,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > return 0; > } > + pages += *nr; > *nr += refs; > for (; refs; refs--) > *(pages++) = page++; > @@ -3024,6 +3025,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > return 0; > } > + pages += *nr; > *nr += refs; > for (; refs; refs--) > *(pages++) = page++; This looks correct. But. This is VERY nasty. Before we'd call record_subpages() with pages + *nr, where it was clear we were offsetting by this, now we're making things imo way more confusing. This makes me less in love with this approach to be honest. But perhaps it's the least worst thing for now until we can do a bigger refactor... So since this seems correct to me, and for the sake of moving things forward (was this one patch dropped from mm-new or does mm-new just have an old version? Confused): Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> For this patch obviously with the fix applied. But can we PLEASE revisit this :) > > > -- > > Cheers > > David / dhildenb > Cheers, Lorenzo
On 05.09.25 13:34, Lorenzo Stoakes wrote: > On Fri, Sep 05, 2025 at 08:41:23AM +0200, David Hildenbrand wrote: >> On 01.09.25 17:03, David Hildenbrand wrote: >>> We can just cleanup the code by calculating the #refs earlier, >>> so we can just inline what remains of record_subpages(). >>> >>> Calculate the number of references/pages ahead of times, and record them >>> only once all our tests passed. >>> >>> Signed-off-by: David Hildenbrand <david@redhat.com> > > So strange I thought I looked at this...! > >>> --- >>> mm/gup.c | 25 ++++++++----------------- >>> 1 file changed, 8 insertions(+), 17 deletions(-) >>> >>> diff --git a/mm/gup.c b/mm/gup.c >>> index c10cd969c1a3b..f0f4d1a68e094 100644 >>> --- a/mm/gup.c >>> +++ b/mm/gup.c >>> @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) >>> #ifdef CONFIG_MMU >>> #ifdef CONFIG_HAVE_GUP_FAST >>> -static int record_subpages(struct page *page, unsigned long sz, >>> - unsigned long addr, unsigned long end, >>> - struct page **pages) >>> -{ >>> - int nr; >>> - >>> - page += (addr & (sz - 1)) >> PAGE_SHIFT; >>> - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) >>> - pages[nr] = page++; >>> - >>> - return nr; >>> -} >>> - >>> /** >>> * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. >>> * @page: pointer to page to be grabbed >>> @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >>> if (pmd_special(orig)) >>> return 0; >>> - page = pmd_page(orig); >>> - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); >>> + refs = (end - addr) >> PAGE_SHIFT; >>> + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); >>> folio = try_grab_folio_fast(page, refs, flags); >>> if (!folio) >>> @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >>> } >>> *nr += refs; >>> + for (; refs; refs--) >>> + *(pages++) = page++; >>> folio_set_referenced(folio); >>> return 1; >>> } >>> @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >>> if (pud_special(orig)) >>> return 0; >>> - page = pud_page(orig); >>> - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); >>> + refs = (end - addr) >> PAGE_SHIFT; >>> + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); >>> folio = try_grab_folio_fast(page, refs, flags); >>> if (!folio) >>> @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >>> } >>> *nr += refs; >>> + for (; refs; refs--) >>> + *(pages++) = page++; >>> folio_set_referenced(folio); >>> return 1; >>> } >> >> Okay, this code is nasty. We should rework this code to just return the nr and receive a the proper >> pages pointer, getting rid of the "*nr" parameter. >> >> For the time being, the following should do the trick: >> >> commit bfd07c995814354f6b66c5b6a72e96a7aa9fb73b (HEAD -> nth_page) >> Author: David Hildenbrand <david@redhat.com> >> Date: Fri Sep 5 08:38:43 2025 +0200 >> >> fixup: mm/gup: remove record_subpages() >> pages is not adjusted by the caller, but idnexed by existing *nr. >> Signed-off-by: David Hildenbrand <david@redhat.com> >> >> diff --git a/mm/gup.c b/mm/gup.c >> index 010fe56f6e132..22420f2069ee1 100644 >> --- a/mm/gup.c >> +++ b/mm/gup.c >> @@ -2981,6 +2981,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >> return 0; >> } >> + pages += *nr; >> *nr += refs; >> for (; refs; refs--) >> *(pages++) = page++; >> @@ -3024,6 +3025,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >> return 0; >> } >> + pages += *nr; >> *nr += refs; >> for (; refs; refs--) >> *(pages++) = page++; > > This looks correct. > > But. > > This is VERY nasty. Before we'd call record_subpages() with pages + *nr, where > it was clear we were offsetting by this, now we're making things imo way more > confusing. > > This makes me less in love with this approach to be honest. > > But perhaps it's the least worst thing for now until we can do a bigger > refactor... > > So since this seems correct to me, and for the sake of moving things forward > (was this one patch dropped from mm-new or does mm-new just have an old version? > Confused): > > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > For this patch obviously with the fix applied. > > But can we PLEASE revisit this :) Yeah, I already asked someone internally if he would have time to do some refactorings in mm/gup.c. If that won't work out I shall do it at some point (and the same time reworking follow_page_mask() to just consume the array as well like gup does) -- Cheers David / dhildenb
On 9/5/25 12:41 AM, David Hildenbrand wrote: > On 01.09.25 17:03, David Hildenbrand wrote: >> We can just cleanup the code by calculating the #refs earlier, >> so we can just inline what remains of record_subpages(). >> >> Calculate the number of references/pages ahead of times, and record them >> only once all our tests passed. >> >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> mm/gup.c | 25 ++++++++----------------- >> 1 file changed, 8 insertions(+), 17 deletions(-) >> >> diff --git a/mm/gup.c b/mm/gup.c >> index c10cd969c1a3b..f0f4d1a68e094 100644 >> --- a/mm/gup.c >> +++ b/mm/gup.c >> @@ -484,19 +484,6 @@ static inline void mm_set_has_pinned_flag(struct mm_struct *mm) >> #ifdef CONFIG_MMU >> #ifdef CONFIG_HAVE_GUP_FAST >> -static int record_subpages(struct page *page, unsigned long sz, >> - unsigned long addr, unsigned long end, >> - struct page **pages) >> -{ >> - int nr; >> - >> - page += (addr & (sz - 1)) >> PAGE_SHIFT; >> - for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) >> - pages[nr] = page++; >> - >> - return nr; >> -} >> - >> /** >> * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. >> * @page: pointer to page to be grabbed >> @@ -2967,8 +2954,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >> if (pmd_special(orig)) >> return 0; >> - page = pmd_page(orig); >> - refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); >> + refs = (end - addr) >> PAGE_SHIFT; >> + page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); >> folio = try_grab_folio_fast(page, refs, flags); >> if (!folio) >> @@ -2989,6 +2976,8 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, >> } >> *nr += refs; >> + for (; refs; refs--) >> + *(pages++) = page++; >> folio_set_referenced(folio); >> return 1; >> } >> @@ -3007,8 +2996,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >> if (pud_special(orig)) >> return 0; >> - page = pud_page(orig); >> - refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); >> + refs = (end - addr) >> PAGE_SHIFT; >> + page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); >> folio = try_grab_folio_fast(page, refs, flags); >> if (!folio) >> @@ -3030,6 +3019,8 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, >> } >> *nr += refs; >> + for (; refs; refs--) >> + *(pages++) = page++; >> folio_set_referenced(folio); >> return 1; >> } > > Okay, this code is nasty. We should rework this code to just return the nr and receive a the proper > pages pointer, getting rid of the "*nr" parameter. > > For the time being, the following should do the trick: > > commit bfd07c995814354f6b66c5b6a72e96a7aa9fb73b (HEAD -> nth_page) > Author: David Hildenbrand <david@redhat.com> > Date: Fri Sep 5 08:38:43 2025 +0200 > > fixup: mm/gup: remove record_subpages() > pages is not adjusted by the caller, but idnexed by existing *nr. > Signed-off-by: David Hildenbrand <david@redhat.com> > > diff --git a/mm/gup.c b/mm/gup.c > index 010fe56f6e132..22420f2069ee1 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2981,6 +2981,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > return 0; > } > > + pages += *nr; > *nr += refs; > for (; refs; refs--) > *(pages++) = page++; > @@ -3024,6 +3025,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > return 0; > } > > + pages += *nr; > *nr += refs; > for (; refs; refs--) > *(pages++) = page++; > Tested as fixing the issue for me, thanks. -- Jens Axboe
© 2016 - 2025 Red Hat, Inc.