From: Kairui Song <kasong@tencent.com>
To prepare for using the swap table as the unified swap layer, introduce
macros and helpers for storing multiple kinds of data in a swap table
entry.
From now on, we are storing PFN in the swap table to make space for
extra counting bits (SWAP_COUNT). Shadows are still stored as they are,
as the SWAP_COUNT is not used yet.
Also, rename shadow_swp_to_tb to shadow_to_swp_tb; that's a spelling
error, not really worth a separate fix.
No behaviour change yet, just prepare the API.
Signed-off-by: Kairui Song <kasong@tencent.com>
---
mm/swap_state.c | 6 +--
mm/swap_table.h | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 117 insertions(+), 13 deletions(-)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 6d0eef7470be..e213ee35c1d2 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -148,7 +148,7 @@ void __swap_cache_add_folio(struct swap_cluster_info *ci,
VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
VM_WARN_ON_ONCE_FOLIO(!folio_test_swapbacked(folio), folio);
- new_tb = folio_to_swp_tb(folio);
+ new_tb = folio_to_swp_tb(folio, 0);
ci_start = swp_cluster_offset(entry);
ci_off = ci_start;
ci_end = ci_start + nr_pages;
@@ -249,7 +249,7 @@ void __swap_cache_del_folio(struct swap_cluster_info *ci, struct folio *folio,
VM_WARN_ON_ONCE_FOLIO(folio_test_writeback(folio), folio);
si = __swap_entry_to_info(entry);
- new_tb = shadow_swp_to_tb(shadow);
+ new_tb = shadow_to_swp_tb(shadow, 0);
ci_start = swp_cluster_offset(entry);
ci_end = ci_start + nr_pages;
ci_off = ci_start;
@@ -331,7 +331,7 @@ void __swap_cache_replace_folio(struct swap_cluster_info *ci,
VM_WARN_ON_ONCE(!entry.val);
/* Swap cache still stores N entries instead of a high-order entry */
- new_tb = folio_to_swp_tb(new);
+ new_tb = folio_to_swp_tb(new, 0);
do {
old_tb = __swap_table_xchg(ci, ci_off, new_tb);
WARN_ON_ONCE(!swp_tb_is_folio(old_tb) || swp_tb_to_folio(old_tb) != old);
diff --git a/mm/swap_table.h b/mm/swap_table.h
index 10e11d1f3b04..9c4083e4e4f2 100644
--- a/mm/swap_table.h
+++ b/mm/swap_table.h
@@ -12,17 +12,72 @@ struct swap_table {
};
#define SWP_TABLE_USE_PAGE (sizeof(struct swap_table) == PAGE_SIZE)
-#define SWP_TB_COUNT_BITS 4
/*
* A swap table entry represents the status of a swap slot on a swap
* (physical or virtual) device. The swap table in each cluster is a
* 1:1 map of the swap slots in this cluster.
*
- * Each swap table entry could be a pointer (folio), a XA_VALUE
- * (shadow), or NULL.
+ * Swap table entry type and bits layouts:
+ *
+ * NULL: |---------------- 0 ---------------| - Free slot
+ * Shadow: | SWAP_COUNT |---- SHADOW_VAL ---|1| - Swapped out slot
+ * PFN: | SWAP_COUNT |------ PFN -------|10| - Cached slot
+ * Pointer: |----------- Pointer ----------|100| - (Unused)
+ * Bad: |------------- 1 -------------|1000| - Bad slot
+ *
+ * SWAP_COUNT is `SWP_TB_COUNT_BITS` long, each entry is an atomic long.
+ *
+ * Usages:
+ *
+ * - NULL: Swap slot is unused, could be allocated.
+ *
+ * - Shadow: Swap slot is used and not cached (usually swapped out). It reuses
+ * the XA_VALUE format to be compatible with working set shadows. SHADOW_VAL
+ * part might be all 0 if the working shadow info is absent. In such a case,
+ * we still want to keep the shadow format as a placeholder.
+ *
+ * Memcg ID is embedded in SHADOW_VAL.
+ *
+ * - PFN: Swap slot is in use, and cached. Memcg info is recorded on the page
+ * struct.
+ *
+ * - Pointer: Unused yet. `0b100` is reserved for potential pointer usage
+ * because only the lower three bits can be used as a marker for 8 bytes
+ * aligned pointers.
+ *
+ * - Bad: Swap slot is reserved, protects swap header or holes on swap devices.
*/
+/* Common SWAP_COUNT part */
+#define SWP_TB_COUNT_BITS 4 /* This can be shrunk or extended if needed */
+#define SWP_TB_COUNT_MASK (~((~0UL) >> SWP_TB_COUNT_BITS))
+#define SWP_TB_COUNT_SHIFT (BITS_PER_LONG - SWP_TB_COUNT_BITS)
+#define SWP_TB_COUNT_MAX ((1 << SWP_TB_COUNT_BITS) - 2)
+
+/* NULL Entry, all 0 */
+#define SWP_TB_NULL 0UL
+
+/* Swapped out: Shadow */
+#define SWP_TB_SHADOW_MARK 0b1UL
+
+/* Cached: PFN */
+#define SWP_TB_PFN_MASK ((~0UL) >> SWP_TB_COUNT_BITS)
+#define SWP_TB_PFN_MARK 0b10UL
+#define SWP_TB_PFN_MARK_BITS 2
+#define SWP_TB_PFN_MARK_MASK (BIT(SWP_TB_PFN_MARK_BITS) - 1)
+
+/* Bad slot, ends with 0b1000 and rests of bits are all 1 */
+#define SWP_TB_BAD ((~0UL) << 3)
+
+#if defined(MAX_POSSIBLE_PHYSMEM_BITS)
+#define SWAP_CACHE_PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
+#elif defined(MAX_PHYSMEM_BITS)
+#define SWAP_CACHE_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+#else
+#define SWAP_CACHE_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT)
+#endif
+
/* Macro for shadow offset calculation */
#define SWAP_COUNT_SHIFT SWP_TB_COUNT_BITS
@@ -35,18 +90,41 @@ static inline unsigned long null_to_swp_tb(void)
return 0;
}
-static inline unsigned long folio_to_swp_tb(struct folio *folio)
+static inline unsigned long __count_to_swp_tb(unsigned char count)
{
+ VM_WARN_ON(count > SWP_TB_COUNT_MAX);
+ return ((unsigned long)count) << SWP_TB_COUNT_SHIFT;
+}
+
+static inline unsigned long pfn_to_swp_tb(unsigned long pfn, unsigned int count)
+{
+ unsigned long swp_tb;
+
BUILD_BUG_ON(sizeof(unsigned long) != sizeof(void *));
- return (unsigned long)folio;
+ BUILD_BUG_ON(SWAP_CACHE_PFN_BITS >
+ (BITS_PER_LONG - SWP_TB_PFN_MARK_BITS - SWP_TB_COUNT_BITS));
+
+ swp_tb = (pfn << SWP_TB_PFN_MARK_BITS) | SWP_TB_PFN_MARK;
+ VM_WARN_ON_ONCE(swp_tb & SWP_TB_COUNT_MASK);
+
+ return swp_tb | __count_to_swp_tb(count);
}
-static inline unsigned long shadow_swp_to_tb(void *shadow)
+static inline unsigned long folio_to_swp_tb(struct folio *folio, unsigned int count)
+{
+ return pfn_to_swp_tb(folio_pfn(folio), count);
+}
+
+static inline unsigned long shadow_to_swp_tb(void *shadow, unsigned int count)
{
BUILD_BUG_ON((BITS_PER_XA_VALUE + 1) !=
BITS_PER_BYTE * sizeof(unsigned long));
+ BUILD_BUG_ON((unsigned long)xa_mk_value(0) != SWP_TB_SHADOW_MARK);
+
VM_WARN_ON_ONCE(shadow && !xa_is_value(shadow));
- return (unsigned long)shadow;
+ VM_WARN_ON_ONCE(shadow && ((unsigned long)shadow & SWP_TB_COUNT_MASK));
+
+ return (unsigned long)shadow | __count_to_swp_tb(count) | SWP_TB_SHADOW_MARK;
}
/*
@@ -59,7 +137,7 @@ static inline bool swp_tb_is_null(unsigned long swp_tb)
static inline bool swp_tb_is_folio(unsigned long swp_tb)
{
- return !xa_is_value((void *)swp_tb) && !swp_tb_is_null(swp_tb);
+ return ((swp_tb & SWP_TB_PFN_MARK_MASK) == SWP_TB_PFN_MARK);
}
static inline bool swp_tb_is_shadow(unsigned long swp_tb)
@@ -67,19 +145,43 @@ static inline bool swp_tb_is_shadow(unsigned long swp_tb)
return xa_is_value((void *)swp_tb);
}
+static inline bool swp_tb_is_bad(unsigned long swp_tb)
+{
+ return swp_tb == SWP_TB_BAD;
+}
+
+static inline bool swp_tb_is_countable(unsigned long swp_tb)
+{
+ return (swp_tb_is_shadow(swp_tb) || swp_tb_is_folio(swp_tb) ||
+ swp_tb_is_null(swp_tb));
+}
+
/*
* Helpers for retrieving info from swap table.
*/
static inline struct folio *swp_tb_to_folio(unsigned long swp_tb)
{
VM_WARN_ON(!swp_tb_is_folio(swp_tb));
- return (void *)swp_tb;
+ return pfn_folio((swp_tb & SWP_TB_PFN_MASK) >> SWP_TB_PFN_MARK_BITS);
}
static inline void *swp_tb_to_shadow(unsigned long swp_tb)
{
VM_WARN_ON(!swp_tb_is_shadow(swp_tb));
- return (void *)swp_tb;
+ return (void *)(swp_tb & ~SWP_TB_COUNT_MASK);
+}
+
+static inline unsigned char __swp_tb_get_count(unsigned long swp_tb)
+{
+ VM_WARN_ON(!swp_tb_is_countable(swp_tb));
+ return ((swp_tb & SWP_TB_COUNT_MASK) >> SWP_TB_COUNT_SHIFT);
+}
+
+static inline int swp_tb_get_count(unsigned long swp_tb)
+{
+ if (swp_tb_is_countable(swp_tb))
+ return __swp_tb_get_count(swp_tb);
+ return -EINVAL;
}
/*
@@ -124,6 +226,8 @@ static inline unsigned long swap_table_get(struct swap_cluster_info *ci,
atomic_long_t *table;
unsigned long swp_tb;
+ VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER);
+
rcu_read_lock();
table = rcu_dereference(ci->table);
swp_tb = table ? atomic_long_read(&table[off]) : null_to_swp_tb();
--
2.52.0
Hi Kairui,
kernel test robot noticed the following build errors:
[auto build test ERROR on 10de4550639e9df9242e32e9affc90ed75a27c7d]
url: https://github.com/intel-lab-lkp/linux/commits/Kairui-Song/mm-swap-protect-si-swap_file-properly-and-use-as-a-mount-indicator/20260126-020149
base: 10de4550639e9df9242e32e9affc90ed75a27c7d
patch link: https://lore.kernel.org/r/20260126-swap-table-p3-v1-6-a74155fab9b0%40tencent.com
patch subject: [PATCH 06/12] mm, swap: implement helpers for reserving data in the swap table
config: arc-allyesconfig (https://download.01.org/0day-ci/archive/20260126/202601261555.3oBvFZIt-lkp@intel.com/config)
compiler: arc-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260126/202601261555.3oBvFZIt-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601261555.3oBvFZIt-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from <command-line>:
In function 'pfn_to_swp_tb',
inlined from 'folio_to_swp_tb' at mm/swap_table.h:115:9,
inlined from '__swap_cache_add_folio' at mm/swap_state.c:151:11:
>> include/linux/compiler_types.h:631:45: error: call to '__compiletime_assert_601' declared with attribute error: BUILD_BUG_ON failed: SWAP_CACHE_PFN_BITS > (BITS_PER_LONG - SWP_TB_PFN_MARK_BITS - SWP_TB_COUNT_BITS)
631 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^
include/linux/compiler_types.h:612:25: note: in definition of macro '__compiletime_assert'
612 | prefix ## suffix(); \
| ^~~~~~
include/linux/compiler_types.h:631:9: note: in expansion of macro '_compiletime_assert'
631 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
| ^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:50:9: note: in expansion of macro 'BUILD_BUG_ON_MSG'
50 | BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
| ^~~~~~~~~~~~~~~~
mm/swap_table.h:104:9: note: in expansion of macro 'BUILD_BUG_ON'
104 | BUILD_BUG_ON(SWAP_CACHE_PFN_BITS >
| ^~~~~~~~~~~~
In function 'pfn_to_swp_tb',
inlined from 'folio_to_swp_tb' at mm/swap_table.h:115:9,
inlined from '__swap_cache_replace_folio' at mm/swap_state.c:334:11:
>> include/linux/compiler_types.h:631:45: error: call to '__compiletime_assert_601' declared with attribute error: BUILD_BUG_ON failed: SWAP_CACHE_PFN_BITS > (BITS_PER_LONG - SWP_TB_PFN_MARK_BITS - SWP_TB_COUNT_BITS)
631 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^
include/linux/compiler_types.h:612:25: note: in definition of macro '__compiletime_assert'
612 | prefix ## suffix(); \
| ^~~~~~
include/linux/compiler_types.h:631:9: note: in expansion of macro '_compiletime_assert'
631 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
| ^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:50:9: note: in expansion of macro 'BUILD_BUG_ON_MSG'
50 | BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
| ^~~~~~~~~~~~~~~~
mm/swap_table.h:104:9: note: in expansion of macro 'BUILD_BUG_ON'
104 | BUILD_BUG_ON(SWAP_CACHE_PFN_BITS >
| ^~~~~~~~~~~~
vim +/__compiletime_assert_601 +631 include/linux/compiler_types.h
eb5c2d4b45e3d2d Will Deacon 2020-07-21 617
eb5c2d4b45e3d2d Will Deacon 2020-07-21 618 #define _compiletime_assert(condition, msg, prefix, suffix) \
eb5c2d4b45e3d2d Will Deacon 2020-07-21 619 __compiletime_assert(condition, msg, prefix, suffix)
eb5c2d4b45e3d2d Will Deacon 2020-07-21 620
eb5c2d4b45e3d2d Will Deacon 2020-07-21 621 /**
eb5c2d4b45e3d2d Will Deacon 2020-07-21 622 * compiletime_assert - break build and emit msg if condition is false
eb5c2d4b45e3d2d Will Deacon 2020-07-21 623 * @condition: a compile-time constant condition to check
eb5c2d4b45e3d2d Will Deacon 2020-07-21 624 * @msg: a message to emit if condition is false
eb5c2d4b45e3d2d Will Deacon 2020-07-21 625 *
eb5c2d4b45e3d2d Will Deacon 2020-07-21 626 * In tradition of POSIX assert, this macro will break the build if the
eb5c2d4b45e3d2d Will Deacon 2020-07-21 627 * supplied condition is *false*, emitting the supplied error message if the
eb5c2d4b45e3d2d Will Deacon 2020-07-21 628 * compiler has support to do so.
eb5c2d4b45e3d2d Will Deacon 2020-07-21 629 */
eb5c2d4b45e3d2d Will Deacon 2020-07-21 630 #define compiletime_assert(condition, msg) \
eb5c2d4b45e3d2d Will Deacon 2020-07-21 @631 _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
eb5c2d4b45e3d2d Will Deacon 2020-07-21 632
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Mon, Jan 26, 2026 at 3:16 PM kernel test robot <lkp@intel.com> wrote: > > Hi Kairui, > > kernel test robot noticed the following build errors: > > [auto build test ERROR on 10de4550639e9df9242e32e9affc90ed75a27c7d] > > url: https://github.com/intel-lab-lkp/linux/commits/Kairui-Song/mm-swap-protect-si-swap_file-properly-and-use-as-a-mount-indicator/20260126-020149 > base: 10de4550639e9df9242e32e9affc90ed75a27c7d > patch link: https://lore.kernel.org/r/20260126-swap-table-p3-v1-6-a74155fab9b0%40tencent.com > patch subject: [PATCH 06/12] mm, swap: implement helpers for reserving data in the swap table > config: arc-allyesconfig (https://download.01.org/0day-ci/archive/20260126/202601261555.3oBvFZIt-lkp@intel.com/config) > compiler: arc-linux-gcc (GCC) 15.2.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260126/202601261555.3oBvFZIt-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202601261555.3oBvFZIt-lkp@intel.com/ > > All errors (new ones prefixed by >>): > > In file included from <command-line>: > In function 'pfn_to_swp_tb', > inlined from 'folio_to_swp_tb' at mm/swap_table.h:115:9, > inlined from '__swap_cache_add_folio' at mm/swap_state.c:151:11: > >> include/linux/compiler_types.h:631:45: error: call to '__compiletime_assert_601' declared with attribute error: BUILD_BUG_ON failed: SWAP_CACHE_PFN_BITS > (BITS_PER_LONG - SWP_TB_PFN_MARK_BITS - SWP_TB_COUNT_BITS) So IIUC ARC may have MAX_POSSIBLE_PHYSMEM_BITS == 40 with PAGE_SHIFT == 13, BITS_PER_LONG == 32. So SWAP_CACHE_PFN_BITS has to be shrunk to 3 on that arch (Currently SWAP_CACHE_PFN_BITS == 4). That is acceptable, swap count value beyond (1 << 3) is still offloaded to the extended table just fine and that's the unlikely case. I'll update the macro definitions to let SWAP_CACHE_PFN_BITS shrink automatically when there are not enough bits, and trigger a build error if SWAP_CACHE_PFN_BITS <= 1, and test the ARC build.
© 2016 - 2026 Red Hat, Inc.