include/linux/mempolicy.h | 2 ++ mm/mempolicy.c | 2 -- mm/zswap.c | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-)
mm-unstable commit 48a7bd12d57f ("mempolicy: alloc_pages_mpol() for NUMA
policy without vma") ended read_swap_cache_async() supporting NULL vma -
okay; but missed the NULL mpol being passed to __read_swap_cache_async()
by zswap_writeback_entry() - oops!
Since its other callers all give good mpol, add get_task_policy(current)
there in mm/zswap.c, to produce the same good-enough behaviour as before
(and task policy, acted on in current task, does not require the refcount
to be dup'ed).
But if that policy is (quite reasonably) MPOL_INTERLEAVE, then ilx must
be NO_INTERLEAVE_INDEX rather than 0, to provide the same distribution
as before: move that definition from mempolicy.c to mempolicy.h.
Reported-by: Domenico Cerasuolo <mimmocerasuolo@gmail.com>
Closes: https://lore.kernel.org/linux-mm/74e34633-6060-f5e3-aee-7040d43f2e93@google.com/T/#mf08c877d1884fc7867f9e328cdf02257ff3b3ae9
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 48a7bd12d57f ("mempolicy: alloc_pages_mpol() for NUMA policy without vma")
Signed-off-by: Hugh Dickins <hughd@google.com>
---
include/linux/mempolicy.h | 2 ++
mm/mempolicy.c | 2 --
mm/zswap.c | 7 +++++--
3 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 2801d5b0a4e9..dd9ed2ce7fd5 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -17,6 +17,8 @@
struct mm_struct;
+#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */
+
#ifdef CONFIG_NUMA
/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 898ee2e3c85b..989293180eb6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -114,8 +114,6 @@
#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
#define MPOL_MF_WRLOCK (MPOL_MF_INTERNAL << 2) /* Write-lock walked vmas */
-#define NO_INTERLEAVE_INDEX (-1UL)
-
static struct kmem_cache *policy_cache;
static struct kmem_cache *sn_cache;
diff --git a/mm/zswap.c b/mm/zswap.c
index 37d2b1cb2ecb..060857adca76 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -24,6 +24,7 @@
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
+#include <linux/mempolicy.h>
#include <linux/mempool.h>
#include <linux/zpool.h>
#include <crypto/acompress.h>
@@ -1057,6 +1058,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
{
swp_entry_t swpentry = entry->swpentry;
struct page *page;
+ struct mempolicy *mpol;
struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx;
struct zpool *pool = zswap_find_zpool(entry);
@@ -1075,8 +1077,9 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
}
/* try to allocate swap cache page */
- page = __read_swap_cache_async(swpentry, GFP_KERNEL, NULL, 0,
- &page_was_allocated);
+ mpol = get_task_policy(current);
+ page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
+ NO_INTERLEAVE_INDEX, &page_was_allocated);
if (!page) {
ret = -ENOMEM;
goto fail;
--
2.35.3
Hi Hugh,
kernel test robot noticed the following build warnings:
url: https://github.com/intel-lab-lkp/linux/commits/UPDATE-20231024-144517/Hugh-Dickins/hugetlbfs-drop-shared-NUMA-mempolicy-pretence/20231003-173301
base: the 10th patch of https://lore.kernel.org/r/74e34633-6060-f5e3-aee-7040d43f2e93%40google.com
patch link: https://lore.kernel.org/r/00dc4f56-e623-7c85-29ea-4211e93063f6%40google.com
patch subject: [PATCH] mempolicy: alloc_pages_mpol() for NUMA policy without vma: fix
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310241551.uY40myKo-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from mm/zswap.c:41:
mm/internal.h: In function 'shrinker_debugfs_name_alloc':
mm/internal.h:1232:9: warning: function 'shrinker_debugfs_name_alloc' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
1232 | shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap);
| ^~~~~~~~
mm/zswap.c: In function 'zswap_writeback_entry':
mm/zswap.c:1322:16: error: implicit declaration of function 'get_task_policy'; did you mean 'get_vma_policy'? [-Werror=implicit-function-declaration]
1322 | mpol = get_task_policy(current);
| ^~~~~~~~~~~~~~~
| get_vma_policy
>> mm/zswap.c:1322:14: warning: assignment to 'struct mempolicy *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
1322 | mpol = get_task_policy(current);
| ^
cc1: some warnings being treated as errors
vim +1322 mm/zswap.c
1282
1283 /*********************************
1284 * writeback code
1285 **********************************/
1286 /*
1287 * Attempts to free an entry by adding a page to the swap cache,
1288 * decompressing the entry data into the page, and issuing a
1289 * bio write to write the page back to the swap device.
1290 *
1291 * This can be thought of as a "resumed writeback" of the page
1292 * to the swap device. We are basically resuming the same swap
1293 * writeback path that was intercepted with the zswap_store()
1294 * in the first place. After the page has been decompressed into
1295 * the swap cache, the compressed version stored by zswap can be
1296 * freed.
1297 */
1298 static int zswap_writeback_entry(struct zswap_entry *entry,
1299 struct zswap_tree *tree)
1300 {
1301 swp_entry_t swpentry = entry->swpentry;
1302 struct page *page;
1303 struct mempolicy *mpol;
1304 struct scatterlist input, output;
1305 struct crypto_acomp_ctx *acomp_ctx;
1306 struct zpool *pool = zswap_find_zpool(entry);
1307 bool page_was_allocated;
1308 u8 *src, *tmp = NULL;
1309 unsigned int dlen;
1310 int ret;
1311 struct writeback_control wbc = {
1312 .sync_mode = WB_SYNC_NONE,
1313 };
1314
1315 if (!zpool_can_sleep_mapped(pool)) {
1316 tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
1317 if (!tmp)
1318 return -ENOMEM;
1319 }
1320
1321 /* try to allocate swap cache page */
> 1322 mpol = get_task_policy(current);
1323 page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
1324 NO_INTERLEAVE_INDEX, &page_was_allocated);
1325 if (!page) {
1326 ret = -ENOMEM;
1327 goto fail;
1328 }
1329
1330 /* Found an existing page, we raced with load/swapin */
1331 if (!page_was_allocated) {
1332 put_page(page);
1333 ret = -EEXIST;
1334 goto fail;
1335 }
1336
1337 /*
1338 * Page is locked, and the swapcache is now secured against
1339 * concurrent swapping to and from the slot. Verify that the
1340 * swap entry hasn't been invalidated and recycled behind our
1341 * backs (our zswap_entry reference doesn't prevent that), to
1342 * avoid overwriting a new swap page with old compressed data.
1343 */
1344 spin_lock(&tree->lock);
1345 if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
1346 spin_unlock(&tree->lock);
1347 delete_from_swap_cache(page_folio(page));
1348 ret = -ENOMEM;
1349 goto fail;
1350 }
1351 spin_unlock(&tree->lock);
1352
1353 /* decompress */
1354 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1355 dlen = PAGE_SIZE;
1356
1357 src = zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO);
1358 if (!zpool_can_sleep_mapped(pool)) {
1359 memcpy(tmp, src, entry->length);
1360 src = tmp;
1361 zpool_unmap_handle(pool, entry->handle);
1362 }
1363
1364 mutex_lock(acomp_ctx->mutex);
1365 sg_init_one(&input, src, entry->length);
1366 sg_init_table(&output, 1);
1367 sg_set_page(&output, page, PAGE_SIZE, 0);
1368 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1369 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1370 dlen = acomp_ctx->req->dlen;
1371 mutex_unlock(acomp_ctx->mutex);
1372
1373 if (!zpool_can_sleep_mapped(pool))
1374 kfree(tmp);
1375 else
1376 zpool_unmap_handle(pool, entry->handle);
1377
1378 BUG_ON(ret);
1379 BUG_ON(dlen != PAGE_SIZE);
1380
1381 /* page is up to date */
1382 SetPageUptodate(page);
1383
1384 /* move it to the tail of the inactive list after end_writeback */
1385 SetPageReclaim(page);
1386
1387 /* start writeback */
1388 __swap_writepage(page, &wbc);
1389 put_page(page);
1390 zswap_written_back_pages++;
1391
1392 return ret;
1393
1394 fail:
1395 if (!zpool_can_sleep_mapped(pool))
1396 kfree(tmp);
1397
1398 /*
1399 * If we get here because the page is already in swapcache, a
1400 * load may be happening concurrently. It is safe and okay to
1401 * not free the entry. It is also okay to return !0.
1402 */
1403 return ret;
1404 }
1405
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Tue, 24 Oct 2023, kernel test robot wrote: > Hi Hugh, > > kernel test robot noticed the following build warnings: > > > > url: https://github.com/intel-lab-lkp/linux/commits/UPDATE-20231024-144517/Hugh-Dickins/hugetlbfs-drop-shared-NUMA-mempolicy-pretence/20231003-173301 > base: the 10th patch of https://lore.kernel.org/r/74e34633-6060-f5e3-aee-7040d43f2e93%40google.com > patch link: https://lore.kernel.org/r/00dc4f56-e623-7c85-29ea-4211e93063f6%40google.com > patch subject: [PATCH] mempolicy: alloc_pages_mpol() for NUMA policy without vma: fix > config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/config) > compiler: m68k-linux-gcc (GCC) 13.2.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202310241551.uY40myKo-lkp@intel.com/ > > All warnings (new ones prefixed by >>): > > In file included from mm/zswap.c:41: > mm/internal.h: In function 'shrinker_debugfs_name_alloc': > mm/internal.h:1232:9: warning: function 'shrinker_debugfs_name_alloc' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format] > 1232 | shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap); > | ^~~~~~~~ > mm/zswap.c: In function 'zswap_writeback_entry': > mm/zswap.c:1322:16: error: implicit declaration of function 'get_task_policy'; did you mean 'get_vma_policy'? [-Werror=implicit-function-declaration] > 1322 | mpol = get_task_policy(current); > | ^~~~~~~~~~~~~~~ > | get_vma_policy > >> mm/zswap.c:1322:14: warning: assignment to 'struct mempolicy *' from 'int' makes pointer from integer without a cast [-Wint-conversion] > 1322 | mpol = get_task_policy(current); > | ^ > cc1: some warnings being treated as errors Gaah, thanks for that, I never built it without CONFIG_NUMA=y: v2 patch with a get_task_policy() without CONFIG_NUMA coming up, built this time. Hugh
mm-unstable commit 48a7bd12d57f ("mempolicy: alloc_pages_mpol() for NUMA
policy without vma") ended read_swap_cache_async() supporting NULL vma -
okay; but missed the NULL mpol being passed to __read_swap_cache_async()
by zswap_writeback_entry() - oops!
Since its other callers all give good mpol, add get_task_policy(current)
there in mm/zswap.c, to produce the same good-enough behaviour as before
(and task policy, acted on in current task, does not require the refcount
to be dup'ed).
But if that policy is (quite reasonably) MPOL_INTERLEAVE, then ilx must
be NO_INTERLEAVE_INDEX rather than 0, to provide the same distribution
as before: move that definition from mempolicy.c to mempolicy.h.
Reported-by: Domenico Cerasuolo <mimmocerasuolo@gmail.com>
Closes: https://lore.kernel.org/linux-mm/74e34633-6060-f5e3-aee-7040d43f2e93@google.com/T/#mf08c877d1884fc7867f9e328cdf02257ff3b3ae9
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 48a7bd12d57f ("mempolicy: alloc_pages_mpol() for NUMA policy without vma")
Signed-off-by: Hugh Dickins <hughd@google.com>
---
v2: !CONFIG_NUMA builds with a get_task_policy() added in mempolicy.h
include/linux/mempolicy.h | 7 +++++++
mm/mempolicy.c | 2 --
mm/zswap.c | 7 +++++--
3 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 2801d5b0a4e9..931b118336f4 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -17,6 +17,8 @@
struct mm_struct;
+#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */
+
#ifdef CONFIG_NUMA
/*
@@ -179,6 +181,11 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
struct mempolicy {};
+static inline struct mempolicy *get_task_policy(struct task_struct *p)
+{
+ return NULL;
+}
+
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
return true;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 898ee2e3c85b..989293180eb6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -114,8 +114,6 @@
#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
#define MPOL_MF_WRLOCK (MPOL_MF_INTERNAL << 2) /* Write-lock walked vmas */
-#define NO_INTERLEAVE_INDEX (-1UL)
-
static struct kmem_cache *policy_cache;
static struct kmem_cache *sn_cache;
diff --git a/mm/zswap.c b/mm/zswap.c
index 37d2b1cb2ecb..060857adca76 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -24,6 +24,7 @@
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
+#include <linux/mempolicy.h>
#include <linux/mempool.h>
#include <linux/zpool.h>
#include <crypto/acompress.h>
@@ -1057,6 +1058,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
{
swp_entry_t swpentry = entry->swpentry;
struct page *page;
+ struct mempolicy *mpol;
struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx;
struct zpool *pool = zswap_find_zpool(entry);
@@ -1075,8 +1077,9 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
}
/* try to allocate swap cache page */
- page = __read_swap_cache_async(swpentry, GFP_KERNEL, NULL, 0,
- &page_was_allocated);
+ mpol = get_task_policy(current);
+ page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
+ NO_INTERLEAVE_INDEX, &page_was_allocated);
if (!page) {
ret = -ENOMEM;
goto fail;
--
2.35.3
© 2016 - 2026 Red Hat, Inc.