Here is an example of how you might use a SPM memory node.
If there is compressed ram available (in this case, a bit present
in mt_spm_nodelist), we skip the entire software compression process
and memcpy directly to a compressed memory folio, and store the newly
allocated compressed memory page as the zswap entry->handle.
On decompress we do the opposite: copy directly from the stored
page to the destination, and free the compressed memory page.
Note: We do not integrate any compressed memory device checks at
this point because this is a stand-in to demonstrate how the SPM
node allocation mechanism works.
See the "TODO" comment in `zswap_compress_direct()` for more details
In reality, we would want to move this mechanism out of zswap into
its own component (cram.c?), and enable a more direct migrate_page()
call that actually re-maps the page read-only into any mappings, and
then provides a write-fault handler which promotes the page on write.
(Similar to a NUMA Hint Fault, but only on write-access)
This prevents any run-away compression ratio failures, since the
compression ratio would be checked on allocation, rather than allowed
to silently decrease on writes until the device becomes unstable.
Signed-off-by: Gregory Price <gourry@gourry.net>
---
mm/zswap.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 65 insertions(+), 1 deletion(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index c1af782e54ec..e6f48a4e90f1 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -25,6 +25,7 @@
#include <linux/scatterlist.h>
#include <linux/mempolicy.h>
#include <linux/mempool.h>
+#include <linux/memory-tiers.h>
#include <crypto/acompress.h>
#include <linux/zswap.h>
#include <linux/mm_types.h>
@@ -191,6 +192,7 @@ struct zswap_entry {
swp_entry_t swpentry;
unsigned int length;
bool referenced;
+ bool direct;
struct zswap_pool *pool;
unsigned long handle;
struct obj_cgroup *objcg;
@@ -717,7 +719,8 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
static void zswap_entry_free(struct zswap_entry *entry)
{
zswap_lru_del(&zswap_list_lru, entry);
- zs_free(entry->pool->zs_pool, entry->handle);
+ if (!entry->direct)
+ zs_free(entry->pool->zs_pool, entry->handle);
zswap_pool_put(entry->pool);
if (entry->objcg) {
obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
@@ -851,6 +854,43 @@ static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
mutex_unlock(&acomp_ctx->mutex);
}
+static struct page *zswap_compress_direct(struct page *src,
+ struct zswap_entry *entry)
+{
+ int nid = first_node(mt_spm_nodelist);
+ struct page *dst;
+ gfp_t gfp;
+
+ if (nid == NUMA_NO_NODE)
+ return NULL;
+
+ gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE |
+ __GFP_SPM_NODE;
+ dst = __alloc_pages(gfp, 0, nid, &mt_spm_nodelist);
+ if (!dst)
+ return NULL;
+
+ /*
+ * TODO: check that the page is safe to use
+ *
+ * In a real implementation, we would not be using ZSWAP to demonstrate this
+ * and instead would implement a new component (compressed_ram, cram.c?)
+ *
+ * At this point we would check via some callback that the device's memory
+ * is actually safe to use - and if not, free the page (without writing to
+ * it), and kick off kswapd for that node to make room.
+ *
+ * Alternatively, if the compressed memory device(s) report a watermark
+ * crossing via interrupt, a flag can be set that is checked here rather
+ * that calling back into a device driver.
+ *
+ * In this case, we're testing with normal memory, so the memory is always
+ * safe to use (i.e. no compression ratio to worry about).
+ */
+ copy_mc_highpage(dst, src);
+ return dst;
+}
+
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
struct zswap_pool *pool)
{
@@ -862,6 +902,19 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
bool mapped = false;
+ struct page *zpage;
+
+ /* Try to shunt directly to compressed ram */
+ if (!nodes_empty(mt_spm_nodelist)) {
+ zpage = zswap_compress_direct(page, entry);
+ if (zpage) {
+ entry->handle = (unsigned long)zpage;
+ entry->length = PAGE_SIZE;
+ entry->direct = true;
+ return true;
+ }
+ /* otherwise fallback to normal zswap */
+ }
acomp_ctx = acomp_ctx_get_cpu_lock(pool);
dst = acomp_ctx->buffer;
@@ -939,6 +992,16 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
int decomp_ret = 0, dlen = PAGE_SIZE;
u8 *src, *obj;
+ /* compressed ram page */
+ if (entry->direct) {
+ struct page *src = (struct page *)entry->handle;
+ struct folio *zfolio = page_folio(src);
+
+ memcpy_folio(folio, 0, zfolio, 0, PAGE_SIZE);
+ __free_page(src);
+ goto direct_done;
+ }
+
acomp_ctx = acomp_ctx_get_cpu_lock(pool);
obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer);
@@ -972,6 +1035,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
zs_obj_read_end(pool->zs_pool, entry->handle, obj);
acomp_ctx_put_unlock(acomp_ctx);
+direct_done:
if (!decomp_ret && dlen == PAGE_SIZE)
return true;
--
2.51.1