include/linux/vmalloc.h | 7 ++ mm/vmalloc.c | 163 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+)
In confidential computing environments (arm64 CCA, x86 SEV/TDX), guest
memory is encrypted by default and must be explicitly transitioned to a
decrypted/shared state for host-visible access. Calling
set_memory_decrypted() on a vmalloc address is not supported, and not
recommended as it would be inefficient to decrypt the pages after they
have been mapped.
Add vmalloc_decrypted() and vzalloc_decrypted() which decrypt pages on
the linear map before creating the vmalloc mapping via vmap(), so
physical pages are never mapped with conflicting encryption attributes
across aliases. A new VM_DECRYPTED flag marks these allocations so that
vfree() automatically re-encrypts pages before returning them to the
page allocator.
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/linux-arm-kernel/ZmNJdSxSz-sYpVgI@arm.com/
Signed-off-by: Kameron Carr <kameroncarr@linux.microsoft.com>
---
include/linux/vmalloc.h | 7 ++
mm/vmalloc.c | 163 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 170 insertions(+)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3b02c0c6b371..d87e1953da55 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -38,6 +38,7 @@ struct iov_iter; /* in uio.h */
#define VM_DEFER_KMEMLEAK 0
#endif
#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */
+#define VM_DECRYPTED 0x00002000 /* pages decrypted for host-shared access, re-encrypt on vfree */
/* bits [20..32] reserved for arch specific ioremap internals */
@@ -153,6 +154,12 @@ extern void *vmalloc_noprof(unsigned long size) __alloc_size(1);
extern void *vzalloc_noprof(unsigned long size) __alloc_size(1);
#define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__))
+extern void *vmalloc_decrypted_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc_decrypted(...) alloc_hooks(vmalloc_decrypted_noprof(__VA_ARGS__))
+
+extern void *vzalloc_decrypted_noprof(unsigned long size) __alloc_size(1);
+#define vzalloc_decrypted(...) alloc_hooks(vzalloc_decrypted_noprof(__VA_ARGS__))
+
extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1);
#define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__))
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index eabb86b13b7e..0e7f0033aa84 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3416,6 +3416,103 @@ void vfree_atomic(const void *addr)
schedule_work(&p->wq);
}
+/*
+ * Transition a single contiguous block of @nr pages at index @idx in
+ * @area->pages to encrypted or decrypted state. On failure, the block's
+ * page-pointer slots are cleared so the standard free path will not return
+ * the pages to the allocator (they are leaked).
+ */
+static int __vm_pages_enc_dec(struct vm_struct *area, unsigned int idx,
+ unsigned int nr, bool encrypt)
+{
+ unsigned long addr =
+ (unsigned long)kasan_reset_tag(page_address(area->pages[idx]));
+ int err = encrypt ? set_memory_encrypted(addr, nr) :
+ set_memory_decrypted(addr, nr);
+
+ if (err)
+ memset(&area->pages[idx], 0, nr * sizeof(*area->pages));
+ return err;
+}
+
+/*
+ * Compact @area->pages, removing slots previously zeroed by
+ * __vm_pages_enc_dec(). Returns the number of leaked pages
+ * (old nr_pages - new nr_pages).
+ */
+static unsigned int vm_compact_leaked_pages(struct vm_struct *area)
+{
+ unsigned int i, dst;
+ unsigned int old_nr = area->nr_pages;
+
+ for (i = 0, dst = 0; i < area->nr_pages; i++) {
+ if (area->pages[i])
+ area->pages[dst++] = area->pages[i];
+ }
+ area->nr_pages = dst;
+ return old_nr - dst;
+}
+
+/*
+ * Re-encrypt the linear-map alias of all pages backing a VM_DECRYPTED area.
+ * Best-effort: on per-block failure the loop continues so as many pages as
+ * possible are returned to the encrypted state. Pages that fail to
+ * transition are left out of area->pages and leaked.
+ */
+static int vm_pages_encrypt(struct vm_struct *area)
+{
+ unsigned int nr = 1U << vm_area_page_order(area);
+ unsigned int i;
+ unsigned int leaked;
+ int ret = 0;
+
+ for (i = 0; i < area->nr_pages; i += nr) {
+ int err = __vm_pages_enc_dec(area, i, nr, true);
+
+ if (err && !ret)
+ ret = err;
+ }
+
+ leaked = vm_compact_leaked_pages(area);
+ if (leaked)
+ pr_warn("vmalloc: re-encryption failed, leaked %u pages\n",
+ leaked);
+ return ret;
+}
+
+/*
+ * Decrypt the linear-map alias of all pages backing a VM_DECRYPTED area.
+ * On failure, the already-decrypted prefix is rolled back to encrypted.
+ * Pages that fail either the initial decrypt or the rollback re-encrypt are
+ * left out of area->pages and leaked.
+ */
+static int vm_pages_decrypt(struct vm_struct *area)
+{
+ unsigned int nr = 1U << vm_area_page_order(area);
+ unsigned int i;
+ unsigned int leaked;
+ int ret = 0;
+
+ for (i = 0; i < area->nr_pages; i += nr) {
+ ret = __vm_pages_enc_dec(area, i, nr, false);
+ if (ret)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ while (i) {
+ i -= nr;
+ __vm_pages_enc_dec(area, i, nr, true);
+ }
+
+ leaked = vm_compact_leaked_pages(area);
+ if (leaked)
+ pr_warn("vmalloc: decryption failed, leaked %u pages\n",
+ leaked);
+ return ret;
+}
+
/**
* vfree - Release memory allocated by vmalloc()
* @addr: Memory base address
@@ -3457,6 +3554,9 @@ void vfree(const void *addr)
return;
}
+ if (unlikely(vm->flags & VM_DECRYPTED))
+ vm_pages_encrypt(vm);
+
if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
vm_reset_perms(vm);
@@ -3895,6 +3995,22 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
goto fail;
}
+ /*
+ * For VM_DECRYPTED areas, decrypt each
+ * page on the linear map before creating the vmalloc alias.
+ */
+ if (area->flags & VM_DECRYPTED) {
+ if (vm_pages_decrypt(area)) {
+ /*
+ * vm_pages_decrypt() re-encrypted what it could;
+ * clear VM_DECRYPTED so the deferred cleanup path
+ * doesn't try to re-encrypt again.
+ */
+ area->flags &= ~VM_DECRYPTED;
+ goto fail;
+ }
+ }
+
/*
* page tables allocations ignore external gfp mask, enforce it
* by the scope API
@@ -4203,6 +4319,50 @@ void *vzalloc_noprof(unsigned long size)
}
EXPORT_SYMBOL(vzalloc_noprof);
+/**
+ * vmalloc_decrypted - allocate virtually contiguous decrypted memory
+ * @size: allocation size
+ *
+ * Allocate pages in decrypted/shared state for host-visible access in
+ * confidential computing environments. Pages are automatically
+ * re-encrypted on vfree().
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+void *vmalloc_decrypted_noprof(unsigned long size)
+{
+ return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL,
+ pgprot_decrypted(PAGE_KERNEL),
+ VM_DECRYPTED, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(vmalloc_decrypted_noprof);
+
+/**
+ * vzalloc_decrypted - allocate zeroed virtually contiguous decrypted memory
+ * @size: allocation size
+ *
+ * Like vmalloc_decrypted(), but the memory is set to zero.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+void *vzalloc_decrypted_noprof(unsigned long size)
+{
+ void *addr;
+
+ addr = __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL,
+ pgprot_decrypted(PAGE_KERNEL),
+ VM_DECRYPTED, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (addr)
+ memset(addr, 0, size);
+
+ return addr;
+}
+EXPORT_SYMBOL(vzalloc_decrypted_noprof);
+
/**
* vmalloc_user - allocate zeroed virtually contiguous memory for userspace
* @size: allocation size
@@ -5271,6 +5431,9 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
if (v->flags & VM_DMA_COHERENT)
seq_puts(m, " dma-coherent");
+ if (v->flags & VM_DECRYPTED)
+ seq_puts(m, " decrypted");
+
if (is_vmalloc_addr(v->pages))
seq_puts(m, " vpages");
base-commit: e9add7501ad3297dad9b90ce201266830a68ab47
--
2.45.4
On Thu, May 21, 2026 at 01:58:34PM -0700, Kameron Carr wrote:
> +/*
> + * Transition a single contiguous block of @nr pages at index @idx in
There's no parameter called @nr_pages; you probably meant @nr.
> + * @area->pages to encrypted or decrypted state. On failure, the block's
> + * page-pointer slots are cleared so the standard free path will not return
> + * the pages to the allocator (they are leaked).
> + */
> +static int __vm_pages_enc_dec(struct vm_struct *area, unsigned int idx,
> + unsigned int nr, bool encrypt)
This 'bool encrypt' parameter is an antipattern. Just split this into
two functions.
> +{
> + unsigned long addr =
> + (unsigned long)kasan_reset_tag(page_address(area->pages[idx]));
> + int err = encrypt ? set_memory_encrypted(addr, nr) :
> + set_memory_decrypted(addr, nr);
> +
> + if (err)
> + memset(&area->pages[idx], 0, nr * sizeof(*area->pages));
> + return err;
> +}
Does it really make sense to pass in 'area' and 'idx' rather than
passing in &area->pages[idx]?
© 2016 - 2026 Red Hat, Inc.