From: Zi Yan <ziy@nvidia.com>
Since compaction code can compact >0 order folios, enable it during the
process.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
mm/compaction.c | 25 ++++++++++---------------
1 file changed, 10 insertions(+), 15 deletions(-)
diff --git a/mm/compaction.c b/mm/compaction.c
index 4300d877b824..f72af74094de 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1087,11 +1087,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (PageCompound(page) && !cc->alloc_contig) {
const unsigned int order = compound_order(page);
- if (likely(order <= MAX_ORDER)) {
- low_pfn += (1UL << order) - 1;
- nr_scanned += (1UL << order) - 1;
+ /*
+ * Compacting > pageblock_order pages does not improve
+ * memory fragmentation. Also skip hugetlbfs pages.
+ */
+ if (likely(order >= pageblock_order) || PageHuge(page)) {
+ if (order <= MAX_ORDER) {
+ low_pfn += (1UL << order) - 1;
+ nr_scanned += (1UL << order) - 1;
+ }
+ goto isolate_fail;
}
- goto isolate_fail;
}
/*
@@ -1214,17 +1220,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
goto isolate_abort;
}
}
-
- /*
- * folio become large since the non-locked check,
- * and it's on LRU.
- */
- if (unlikely(folio_test_large(folio) && !cc->alloc_contig)) {
- low_pfn += folio_nr_pages(folio) - 1;
- nr_scanned += folio_nr_pages(folio) - 1;
- folio_set_lru(folio);
- goto isolate_fail_put;
- }
}
/* The folio is taken off the LRU */
--
2.40.1
Hello,
kernel test robot noticed "kernel_BUG_at_lib/list_debug.c" on:
commit: 810d9ce367799ba4fef1e894b342e5ab74d44681 ("[RFC PATCH 4/4] mm/compaction: enable compacting >0 order folios.")
url: https://github.com/intel-lab-lkp/linux/commits/Zi-Yan/mm-compaction-add-support-for-0-order-folio-memory-compaction/20230913-003027
base: https://git.kernel.org/cgit/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/all/20230912162815.440749-5-zi.yan@sent.com/
patch subject: [RFC PATCH 4/4] mm/compaction: enable compacting >0 order folios.
in testcase: vm-scalability
version: vm-scalability-x86_64-1.0-0_20220518
with following parameters:
runtime: 300s
test: lru-file-readtwice
cpufreq_governor: performance
test-description: The motivation behind this suite is to exercise functions and regions of the mm/ of the Linux kernel which are of interest to us.
test-url: https://git.kernel.org/cgit/linux/kernel/git/wfg/vm-scalability.git/
compiler: gcc-12
test machine: 224 threads 2 sockets Intel(R) Xeon(R) Platinum 8480L (Sapphire Rapids) with 512G memory
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202309202236.2848083f-oliver.sang@intel.com
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20230920/202309202236.2848083f-oliver.sang@intel.com
[ 104.256019][ T1493] list_del corruption, ffd40001e3611490->prev is NULL
[ 104.264911][ T1493] ------------[ cut here ]------------
[ 104.272315][ T1493] kernel BUG at lib/list_debug.c:54!
[ 104.279501][ T1493] invalid opcode: 0000 [#1] SMP NOPTI
[ 104.286658][ T1493] CPU: 91 PID: 1493 Comm: kcompactd1 Not tainted 6.6.0-rc1-00153-g810d9ce36779 #1
[ 104.298169][ T1493] Hardware name: NULL NULL/NULL, BIOS 05.02.01 05/12/2023
[ 104.307252][ T1493] RIP: 0010:__list_del_entry_valid_or_report+0x6e/0xf0
[ 104.315987][ T1493] Code: b8 01 00 00 00 c3 cc cc cc cc 48 89 fe 48 c7 c7 80 c1 71 82 e8 e3 37 a3 ff 0f 0b 48 89 fe 48 c7 c7 b0 c1 71 82 e8 d2 37 a3 ff <0f> 0b 48 89 fe 48 c7 c7 e0 c1 71 82 e8 c1 37 a3 ff 0f 0b 48 89 fe
[ 104.339068][ T1493] RSP: 0018:ffa0000010a37910 EFLAGS: 00010046
[ 104.346919][ T1493] RAX: 0000000000000033 RBX: ff110080749b5ab8 RCX: 0000000000000000
[ 104.356938][ T1493] RDX: 0000000000000000 RSI: ff11007f416dc6c0 RDI: ff11007f416dc6c0
[ 104.366914][ T1493] RBP: ff110040b00af858 R08: 0000000000000000 R09: ffa0000010a377b8
[ 104.376873][ T1493] R10: 0000000000000003 R11: ff11007f40dfffe8 R12: ffd40001e3611400
[ 104.386808][ T1493] R13: 0000000000000000 R14: ffd40001e3611400 R15: ffa0000010a37938
[ 104.396739][ T1493] FS: 0000000000000000(0000) GS:ff11007f416c0000(0000) knlGS:0000000000000000
[ 104.407739][ T1493] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 104.416072][ T1493] CR2: 000055a550b5eb38 CR3: 0000008069078004 CR4: 0000000000f71ee0
[ 104.425986][ T1493] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 104.435870][ T1493] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
[ 104.445790][ T1493] PKRU: 55555554
[ 104.450668][ T1493] Call Trace:
[ 104.455221][ T1493] <TASK>
[ 104.459360][ T1493] ? die+0x36/0xb0
[ 104.464363][ T1493] ? do_trap+0xda/0x130
[ 104.469839][ T1493] ? __list_del_entry_valid_or_report+0x6e/0xf0
[ 104.477666][ T1493] ? do_error_trap+0x65/0xb0
[ 104.483614][ T1493] ? __list_del_entry_valid_or_report+0x6e/0xf0
[ 104.491418][ T1493] ? exc_invalid_op+0x50/0x70
[ 104.497453][ T1493] ? __list_del_entry_valid_or_report+0x6e/0xf0
[ 104.505246][ T1493] ? asm_exc_invalid_op+0x1a/0x20
[ 104.511655][ T1493] ? __list_del_entry_valid_or_report+0x6e/0xf0
[ 104.519423][ T1493] split_huge_page_to_list+0x3ad/0x5b0
[ 104.526306][ T1493] migrate_pages_batch+0x1f6/0x970
[ 104.532797][ T1493] ? __pfx_compaction_alloc+0x10/0x10
[ 104.539564][ T1493] ? __pfx_compaction_free+0x10/0x10
[ 104.546219][ T1493] ? __pfx_compaction_alloc+0x10/0x10
[ 104.552955][ T1493] migrate_pages_sync+0x99/0x230
[ 104.559201][ T1493] ? __pfx_compaction_alloc+0x10/0x10
[ 104.565917][ T1493] ? __pfx_compaction_free+0x10/0x10
[ 104.572522][ T1493] migrate_pages+0x3d9/0x530
[ 104.578341][ T1493] ? __pfx_compaction_alloc+0x10/0x10
[ 104.585033][ T1493] ? __pfx_compaction_free+0x10/0x10
[ 104.591617][ T1493] compact_zone+0x286/0xa30
[ 104.597313][ T1493] kcompactd_do_work+0x103/0x2f0
[ 104.603487][ T1493] kcompactd+0x238/0x430
[ 104.608873][ T1493] ? __pfx_autoremove_wake_function+0x10/0x10
[ 104.616315][ T1493] ? __pfx_kcompactd+0x10/0x10
[ 104.622284][ T1493] kthread+0xcd/0x130
[ 104.627371][ T1493] ? __pfx_kthread+0x10/0x10
[ 104.633117][ T1493] ret_from_fork+0x31/0x70
[ 104.638664][ T1493] ? __pfx_kthread+0x10/0x10
[ 104.644390][ T1493] ret_from_fork_asm+0x1b/0x30
[ 104.650309][ T1493] </TASK>
[ 104.654264][ T1493] Modules linked in: xfs loop btrfs intel_rapl_msr blake2b_generic intel_rapl_common xor ses x86_pkg_temp_thermal enclosure raid6_pq sd_mod scsi_transport_sas intel_powerclamp libcrc32c sg coretemp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel nvme sha512_ssse3 ahci nvme_core rapl ast ipmi_ssif libahci t10_pi mei_me intel_cstate drm_shmem_helper crc64_rocksoft_generic i2c_i801 crc64_rocksoft acpi_ipmi drm_kms_helper megaraid_sas joydev dax_hmem intel_uncore libata mei i2c_ismt crc64 i2c_smbus wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad acpi_power_meter drm fuse ip_tables
[ 104.717626][ T1493] ---[ end trace 0000000000000000 ]---
[ 104.807226][ T1493] RIP: 0010:__list_del_entry_valid_or_report+0x6e/0xf0
[ 104.815628][ T1493] Code: b8 01 00 00 00 c3 cc cc cc cc 48 89 fe 48 c7 c7 80 c1 71 82 e8 e3 37 a3 ff 0f 0b 48 89 fe 48 c7 c7 b0 c1 71 82 e8 d2 37 a3 ff <0f> 0b 48 89 fe 48 c7 c7 e0 c1 71 82 e8 c1 37 a3 ff 0f 0b 48 89 fe
[ 104.838334][ T1493] RSP: 0018:ffa0000010a37910 EFLAGS: 00010046
[ 104.845773][ T1493] RAX: 0000000000000033 RBX: ff110080749b5ab8 RCX: 0000000000000000
[ 104.855343][ T1493] RDX: 0000000000000000 RSI: ff11007f416dc6c0 RDI: ff11007f416dc6c0
[ 104.864898][ T1493] RBP: ff110040b00af858 R08: 0000000000000000 R09: ffa0000010a377b8
[ 104.874452][ T1493] R10: 0000000000000003 R11: ff11007f40dfffe8 R12: ffd40001e3611400
[ 104.884001][ T1493] R13: 0000000000000000 R14: ffd40001e3611400 R15: ffa0000010a37938
[ 104.893543][ T1493] FS: 0000000000000000(0000) GS:ff11007f416c0000(0000) knlGS:0000000000000000
[ 104.904152][ T1493] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 104.912119][ T1493] CR2: 000055a550b5eb38 CR3: 0000008069078004 CR4: 0000000000f71ee0
[ 104.921634][ T1493] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 104.931149][ T1493] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
[ 104.940655][ T1493] PKRU: 55555554
[ 104.945174][ T1493] Kernel panic - not syncing: Fatal exception
[ 105.991260][ T1493] Shutting down cpus with NMI
[ 106.046902][ T1493] Kernel Offset: disabled
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On 9/13/2023 12:28 AM, Zi Yan wrote:
> From: Zi Yan <ziy@nvidia.com>
>
> Since compaction code can compact >0 order folios, enable it during the
> process.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> mm/compaction.c | 25 ++++++++++---------------
> 1 file changed, 10 insertions(+), 15 deletions(-)
>
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 4300d877b824..f72af74094de 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1087,11 +1087,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
> if (PageCompound(page) && !cc->alloc_contig) {
> const unsigned int order = compound_order(page);
>
> - if (likely(order <= MAX_ORDER)) {
> - low_pfn += (1UL << order) - 1;
> - nr_scanned += (1UL << order) - 1;
> + /*
> + * Compacting > pageblock_order pages does not improve
> + * memory fragmentation. Also skip hugetlbfs pages.
> + */
> + if (likely(order >= pageblock_order) || PageHuge(page)) {
IMO, if the compound page order is larger than the requested cc->order,
we should also fail the isolation, cause it also does not improve
fragmentation, right?
> + if (order <= MAX_ORDER) {
> + low_pfn += (1UL << order) - 1;
> + nr_scanned += (1UL << order) - 1;
> + }
> + goto isolate_fail;
> }
> - goto isolate_fail;
> }
>
> /*
> @@ -1214,17 +1220,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
> goto isolate_abort;
> }
> }
> -
> - /*
> - * folio become large since the non-locked check,
> - * and it's on LRU.
> - */
> - if (unlikely(folio_test_large(folio) && !cc->alloc_contig)) > - low_pfn += folio_nr_pages(folio) - 1;
> - nr_scanned += folio_nr_pages(folio) - 1;
> - folio_set_lru(folio);
> - goto isolate_fail_put;
> - }
I do not think you can remove this validation, since previous validation
is lockless. So under the lock, we need re-check if the compound page
order is larger than pageblock_order or cc->order, that need fail to
isolate.
> }
>
> /* The folio is taken off the LRU */
On 15 Sep 2023, at 5:41, Baolin Wang wrote:
> On 9/13/2023 12:28 AM, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>>
>> Since compaction code can compact >0 order folios, enable it during the
>> process.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> mm/compaction.c | 25 ++++++++++---------------
>> 1 file changed, 10 insertions(+), 15 deletions(-)
>>
>> diff --git a/mm/compaction.c b/mm/compaction.c
>> index 4300d877b824..f72af74094de 100644
>> --- a/mm/compaction.c
>> +++ b/mm/compaction.c
>> @@ -1087,11 +1087,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
>> if (PageCompound(page) && !cc->alloc_contig) {
>> const unsigned int order = compound_order(page);
>> - if (likely(order <= MAX_ORDER)) {
>> - low_pfn += (1UL << order) - 1;
>> - nr_scanned += (1UL << order) - 1;
>> + /*
>> + * Compacting > pageblock_order pages does not improve
>> + * memory fragmentation. Also skip hugetlbfs pages.
>> + */
>> + if (likely(order >= pageblock_order) || PageHuge(page)) {
>
> IMO, if the compound page order is larger than the requested cc->order, we should also fail the isolation, cause it also does not improve fragmentation, right?
>
Probably yes. I think the reasoning should be since compaction is asking for cc->order,
we should not compacting folios with orders larger than or equal to that, since
cc->order tells us the max free page order is smaller than it, otherwise the
allocation would happen already. I will add this condition in the next version.
>> + if (order <= MAX_ORDER) {
>> + low_pfn += (1UL << order) - 1;
>> + nr_scanned += (1UL << order) - 1;
>> + }
>> + goto isolate_fail;
>> }
>> - goto isolate_fail;
>> }
>> /*
>> @@ -1214,17 +1220,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
>> goto isolate_abort;
>> }
>> }
>> -
>> - /*
>> - * folio become large since the non-locked check,
>> - * and it's on LRU.
>> - */
>> - if (unlikely(folio_test_large(folio) && !cc->alloc_contig)) > - low_pfn += folio_nr_pages(folio) - 1;
>> - nr_scanned += folio_nr_pages(folio) - 1;
>> - folio_set_lru(folio);
>> - goto isolate_fail_put;
>> - }
>
> I do not think you can remove this validation, since previous validation is lockless. So under the lock, we need re-check if the compound page order is larger than pageblock_order or cc->order, that need fail to isolate.
This check should go away, but a new order check for large folios should be
added. Will add it. Thanks.
--
Best Regards,
Yan, Zi
© 2016 - 2025 Red Hat, Inc.