[RFC PATCH] mm/nommu: Implement just enough vmap that compressed erofs can be mounted

Daniel Palmer posted 1 patch 4 days, 5 hours ago
mm/nommu.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 127 insertions(+), 6 deletions(-)
[RFC PATCH] mm/nommu: Implement just enough vmap that compressed erofs can be mounted
Posted by Daniel Palmer 4 days, 5 hours ago
This implements a very poor imitation of vmap that works just
enough that compressed erofs filesystems can be mounted on nommu
machines. Right now compressed erofs filesystems trigger a BUG()
on nommu due to this missing.

This is awful, doesn't work like real vmap etc,.. but if you
really cared about stuff working you'd have an MMU I guess?

Signed-off-by: Daniel Palmer <daniel@thingy.jp>
---

Did I miss anything massive that is going to come back and bite me?
Maybe it would have made more sense just to change the erofs
code so on !CONFIG_MMU it doesn't use vmap?

Why:

I'm attempting to get a kernel and userspace into ~3.5MB
of memory without an MMU. The kernel is just a bit over
2MB so I don't have much left.

I've constructed a userspace that is completely made
up of nolibc binaries and with a bit of tweaking and all
the debugging turned off it fits into a ~64KB erofs.

 mm/nommu.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 127 insertions(+), 6 deletions(-)

diff --git a/mm/nommu.c b/mm/nommu.c
index ed3934bc2de4..a7dbb67b3b69 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -19,6 +19,7 @@
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
+#include <linux/hash.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/file.h>
@@ -53,6 +54,18 @@ static struct kmem_cache *vm_region_jar;
 struct rb_root nommu_region_tree = RB_ROOT;
 DECLARE_RWSEM(nommu_region_sem);
 
+/* Tracking for our "poor man's" vmap */
+#define VMAP_HASH_BITS  6
+static struct hlist_head vmap_hash[1 << VMAP_HASH_BITS];
+static DEFINE_SPINLOCK(vmap_lock);
+
+struct nommu_vmap_area {
+	struct hlist_node node;
+	struct page **pages;
+	unsigned int count;
+	void *addr;
+};
+
 const struct vm_operations_struct generic_file_vm_ops = {
 };
 
@@ -305,29 +318,137 @@ void *vmalloc_32_user_noprof(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc_32_user_noprof);
 
-void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
+static bool vmap_needs_bounce(struct page **pages, unsigned int count)
 {
-	BUG();
+	unsigned long pfn = page_to_pfn(pages[0]);
+	unsigned int i;
+
+	for (i = 1; i < count; i++)
+		if (page_to_pfn(pages[i]) != pfn + i)
+			return true;
+
+	return false;
+}
+
+static inline unsigned int vmap_key(const void *addr)
+{
+	return hash_ptr(addr, VMAP_HASH_BITS);
+}
+
+static struct nommu_vmap_area *vmap_area_find(const void *addr)
+{
+	struct nommu_vmap_area *va;
+
+	hlist_for_each_entry(va, &vmap_hash[vmap_key(addr)], node)
+		if (va->addr == addr)
+			return va;
+
 	return NULL;
 }
+
+static void *nommu_vmap_map(struct page **pages, unsigned int count)
+{
+	struct nommu_vmap_area *va __free(kfree) = NULL;
+	struct page **_pages __free(kfree) = NULL;
+	void *copy __free(kvfree) = NULL;
+	unsigned int i;
+
+	va = kmalloc_obj(struct nommu_vmap_area);
+	if (!va)
+		return NULL;
+
+	if (vmap_needs_bounce(pages, count)) {
+		copy = kvmalloc_array(count, PAGE_SIZE, GFP_KERNEL);
+		if (!copy)
+			return NULL;
+
+		_pages = kmemdup(pages, count * sizeof(*pages), GFP_KERNEL);
+		if (!_pages)
+			return NULL;
+
+		/*
+		 * Copy the original contents of the pages into the new
+		 * pages to pretend we virtually mapped them.
+		 */
+		for (i = 0; i < count; i++) {
+			void *p = copy + (i * PAGE_SIZE);
+
+			memcpy(p, page_address(pages[i]), PAGE_SIZE);
+		}
+
+		va->addr = no_free_ptr(copy);
+		va->pages = no_free_ptr(_pages);
+	} else {
+		va->addr = page_address(pages[0]);
+		va->pages = NULL;
+	}
+
+	va->count = count;
+
+	scoped_guard(spinlock, &vmap_lock) {
+		hlist_add_head(&va->node,
+			       &vmap_hash[vmap_key(va->addr)]);
+	}
+
+	return no_free_ptr(va)->addr;
+}
+
+static void nommu_vmap_unmap(const void *addr)
+{
+	struct nommu_vmap_area *va;
+	unsigned int i;
+
+	scoped_guard(spinlock, &vmap_lock) {
+		va = vmap_area_find(addr);
+		if (va)
+			hlist_del(&va->node);
+	}
+
+	if (WARN_ON_ONCE(!va))
+		return;
+
+	if (va->pages) {
+		/*
+		 * Write back the new contents of the pages to
+		 * the original ones, this is a waste of time if
+		 * the pages weren't written to but we can't tell.
+		 */
+		for (i = 0; i < va->count; i++) {
+			const void *src = addr + (i * PAGE_SIZE);
+			void *dst = page_address(va->pages[i]);
+
+			memcpy(dst, src, PAGE_SIZE);
+		}
+
+		kvfree(va->addr);
+		kfree(va->pages);
+	}
+
+	kfree(va);
+}
+
+void *vmap(struct page **pages, unsigned int count,
+	   unsigned long flags, pgprot_t prot)
+{
+	return nommu_vmap_map(pages, count);
+}
 EXPORT_SYMBOL(vmap);
 
 void vunmap(const void *addr)
 {
-	BUG();
+	nommu_vmap_unmap(addr);
 }
 EXPORT_SYMBOL(vunmap);
 
 void *vm_map_ram(struct page **pages, unsigned int count, int node)
 {
-	BUG();
-	return NULL;
+	return nommu_vmap_map(pages, count);
 }
 EXPORT_SYMBOL(vm_map_ram);
 
 void vm_unmap_ram(const void *mem, unsigned int count)
 {
-	BUG();
+	nommu_vmap_unmap(mem);
 }
 EXPORT_SYMBOL(vm_unmap_ram);
 
-- 
2.53.0
Re: [RFC PATCH] mm/nommu: Implement just enough vmap that compressed erofs can be mounted
Posted by Lorenzo Stoakes 4 days, 4 hours ago
For some reason this iddn't arrive in my inbox properly... strange? Maybe
missing To:?

On Thu, May 21, 2026 at 01:34:38AM +0900, Daniel Palmer wrote:
> This implements a very poor imitation of vmap that works just
> enough that compressed erofs filesystems can be mounted on nommu
> machines. Right now compressed erofs filesystems trigger a BUG()
> on nommu due to this missing.

Yeah I guess not many people are using the two in combination!

>
> This is awful, doesn't work like real vmap etc,.. but if you
> really cared about stuff working you'd have an MMU I guess?
>
> Signed-off-by: Daniel Palmer <daniel@thingy.jp>

I see you're doing something for fun so I don't want to dissuade but might take
time to get to something like this review wise given maintainership burden atm!

> ---
>
> Did I miss anything massive that is going to come back and bite me?
> Maybe it would have made more sense just to change the erofs
> code so on !CONFIG_MMU it doesn't use vmap?

I think that'd probably be better honestly. I think faking out vmalloc is more
trouble than it's worth, and it'd probably have to be everything-or-nothing.

Also you'd _probably_ want to implement it in mm/vmalloc.c not in nommu.c. We
have #ifdef CONFIG_MMU .. #else .. #endif blocks for this but... yeah I think
changing erofs would be a better bet honestly.

There are nommu-specific fs hooks you could possibly use that might make life
easier for this!

>
> Why:
>
> I'm attempting to get a kernel and userspace into ~3.5MB
> of memory without an MMU. The kernel is just a bit over
> 2MB so I don't have much left.
>
> I've constructed a userspace that is completely made
> up of nolibc binaries and with a bit of tweaking and all
> the debugging turned off it fits into a ~64KB erofs.

That's cool and I appreciate that you're doing things for fun :) the kernel is
about this also.

But yeah would gently point you towards doing something on the fs end I think
here :)

Cheers, Lorenzo

>
>  mm/nommu.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 127 insertions(+), 6 deletions(-)
>
> diff --git a/mm/nommu.c b/mm/nommu.c
> index ed3934bc2de4..a7dbb67b3b69 100644
> --- a/mm/nommu.c
> +++ b/mm/nommu.c
> @@ -19,6 +19,7 @@
>  #include <linux/export.h>
>  #include <linux/mm.h>
>  #include <linux/sched/mm.h>
> +#include <linux/hash.h>
>  #include <linux/mman.h>
>  #include <linux/swap.h>
>  #include <linux/file.h>
> @@ -53,6 +54,18 @@ static struct kmem_cache *vm_region_jar;
>  struct rb_root nommu_region_tree = RB_ROOT;
>  DECLARE_RWSEM(nommu_region_sem);
>
> +/* Tracking for our "poor man's" vmap */
> +#define VMAP_HASH_BITS  6
> +static struct hlist_head vmap_hash[1 << VMAP_HASH_BITS];
> +static DEFINE_SPINLOCK(vmap_lock);
> +
> +struct nommu_vmap_area {
> +	struct hlist_node node;
> +	struct page **pages;
> +	unsigned int count;
> +	void *addr;
> +};
> +
>  const struct vm_operations_struct generic_file_vm_ops = {
>  };
>
> @@ -305,29 +318,137 @@ void *vmalloc_32_user_noprof(unsigned long size)
>  }
>  EXPORT_SYMBOL(vmalloc_32_user_noprof);
>
> -void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
> +static bool vmap_needs_bounce(struct page **pages, unsigned int count)
>  {
> -	BUG();
> +	unsigned long pfn = page_to_pfn(pages[0]);
> +	unsigned int i;
> +
> +	for (i = 1; i < count; i++)
> +		if (page_to_pfn(pages[i]) != pfn + i)
> +			return true;
> +
> +	return false;
> +}
> +
> +static inline unsigned int vmap_key(const void *addr)
> +{
> +	return hash_ptr(addr, VMAP_HASH_BITS);
> +}
> +
> +static struct nommu_vmap_area *vmap_area_find(const void *addr)
> +{
> +	struct nommu_vmap_area *va;
> +
> +	hlist_for_each_entry(va, &vmap_hash[vmap_key(addr)], node)
> +		if (va->addr == addr)
> +			return va;
> +
>  	return NULL;
>  }
> +
> +static void *nommu_vmap_map(struct page **pages, unsigned int count)
> +{
> +	struct nommu_vmap_area *va __free(kfree) = NULL;
> +	struct page **_pages __free(kfree) = NULL;
> +	void *copy __free(kvfree) = NULL;
> +	unsigned int i;
> +
> +	va = kmalloc_obj(struct nommu_vmap_area);
> +	if (!va)
> +		return NULL;
> +
> +	if (vmap_needs_bounce(pages, count)) {
> +		copy = kvmalloc_array(count, PAGE_SIZE, GFP_KERNEL);
> +		if (!copy)
> +			return NULL;
> +
> +		_pages = kmemdup(pages, count * sizeof(*pages), GFP_KERNEL);
> +		if (!_pages)
> +			return NULL;
> +
> +		/*
> +		 * Copy the original contents of the pages into the new
> +		 * pages to pretend we virtually mapped them.
> +		 */
> +		for (i = 0; i < count; i++) {
> +			void *p = copy + (i * PAGE_SIZE);
> +
> +			memcpy(p, page_address(pages[i]), PAGE_SIZE);
> +		}
> +
> +		va->addr = no_free_ptr(copy);
> +		va->pages = no_free_ptr(_pages);
> +	} else {
> +		va->addr = page_address(pages[0]);
> +		va->pages = NULL;
> +	}
> +
> +	va->count = count;
> +
> +	scoped_guard(spinlock, &vmap_lock) {
> +		hlist_add_head(&va->node,
> +			       &vmap_hash[vmap_key(va->addr)]);
> +	}
> +
> +	return no_free_ptr(va)->addr;
> +}
> +
> +static void nommu_vmap_unmap(const void *addr)
> +{
> +	struct nommu_vmap_area *va;
> +	unsigned int i;
> +
> +	scoped_guard(spinlock, &vmap_lock) {
> +		va = vmap_area_find(addr);
> +		if (va)
> +			hlist_del(&va->node);
> +	}
> +
> +	if (WARN_ON_ONCE(!va))
> +		return;
> +
> +	if (va->pages) {
> +		/*
> +		 * Write back the new contents of the pages to
> +		 * the original ones, this is a waste of time if
> +		 * the pages weren't written to but we can't tell.
> +		 */
> +		for (i = 0; i < va->count; i++) {
> +			const void *src = addr + (i * PAGE_SIZE);
> +			void *dst = page_address(va->pages[i]);
> +
> +			memcpy(dst, src, PAGE_SIZE);
> +		}
> +
> +		kvfree(va->addr);
> +		kfree(va->pages);
> +	}
> +
> +	kfree(va);
> +}
> +
> +void *vmap(struct page **pages, unsigned int count,
> +	   unsigned long flags, pgprot_t prot)
> +{
> +	return nommu_vmap_map(pages, count);
> +}
>  EXPORT_SYMBOL(vmap);
>
>  void vunmap(const void *addr)
>  {
> -	BUG();
> +	nommu_vmap_unmap(addr);
>  }
>  EXPORT_SYMBOL(vunmap);
>
>  void *vm_map_ram(struct page **pages, unsigned int count, int node)
>  {
> -	BUG();
> -	return NULL;
> +	return nommu_vmap_map(pages, count);
>  }
>  EXPORT_SYMBOL(vm_map_ram);
>
>  void vm_unmap_ram(const void *mem, unsigned int count)
>  {
> -	BUG();
> +	nommu_vmap_unmap(mem);
>  }
>  EXPORT_SYMBOL(vm_unmap_ram);
>
> --
> 2.53.0
>
>
>
>
Re: [RFC PATCH] mm/nommu: Implement just enough vmap that compressed erofs can be mounted
Posted by Daniel Palmer 4 days, 4 hours ago
Hi Lorenzo, (and Pedro),

On Thu, 21 May 2026 at 02:05, Lorenzo Stoakes <ljs@kernel.org> wrote:
>
> For some reason this iddn't arrive in my inbox properly... strange? Maybe
> missing To:?

Sorry, I CC'd everyone hoping I wouldn't get grilled for the silly patch.

> > Did I miss anything massive that is going to come back and bite me?
> > Maybe it would have made more sense just to change the erofs
> > code so on !CONFIG_MMU it doesn't use vmap?
>
> I think that'd probably be better honestly. I think faking out vmalloc is more
> trouble than it's worth, and it'd probably have to be everything-or-nothing.

That makes sense.
Do you think it'd be acceptable to maybe return NULL from these
functions instead of crashing the kernel?

> But yeah would gently point you towards doing something on the fs end I think
> here :)

Noted. I think I'll at least send them a patch to make compression
support depend on CONFIG_MMU.

Thanks!

Daniel
Re: [RFC PATCH] mm/nommu: Implement just enough vmap that compressed erofs can be mounted
Posted by Pedro Falcato 4 days, 1 hour ago
On Thu, May 21, 2026 at 02:22:25AM +0900, Daniel Palmer wrote:
> Hi Lorenzo, (and Pedro),
> 
> On Thu, 21 May 2026 at 02:05, Lorenzo Stoakes <ljs@kernel.org> wrote:
> >
> > For some reason this iddn't arrive in my inbox properly... strange? Maybe
> > missing To:?
> 
> Sorry, I CC'd everyone hoping I wouldn't get grilled for the silly patch.
> 
> > > Did I miss anything massive that is going to come back and bite me?
> > > Maybe it would have made more sense just to change the erofs
> > > code so on !CONFIG_MMU it doesn't use vmap?
> >
> > I think that'd probably be better honestly. I think faking out vmalloc is more
> > trouble than it's worth, and it'd probably have to be everything-or-nothing.
> 
> That makes sense.
> Do you think it'd be acceptable to maybe return NULL from these
> functions instead of crashing the kernel?

I suspect the reason for this is simple: calling these functions in !mmu
configs is a programming mistake, and we want it to blow up loudly instead
of having slightly/very broken behavior.

Maybe in 2026 we'd have picked WARN_ON() + return NULL. But I don't think
it makes too much sense here, honestly.

> 
> > But yeah would gently point you towards doing something on the fs end I think
> > here :)
> 
> Noted. I think I'll at least send them a patch to make compression
> support depend on CONFIG_MMU.

Awesome! (note: since you're space restricted you may actually want that
compression :p)

-- 
Pedro
Re: [RFC PATCH] mm/nommu: Implement just enough vmap that compressed erofs can be mounted
Posted by Pedro Falcato 4 days, 4 hours ago
Hello,

On Thu, May 21, 2026 at 01:34:38AM +0900, Daniel Palmer wrote:
> This implements a very poor imitation of vmap that works just
> enough that compressed erofs filesystems can be mounted on nommu
> machines. Right now compressed erofs filesystems trigger a BUG()
> on nommu due to this missing.
> 
> This is awful, doesn't work like real vmap etc,.. but if you
> really cared about stuff working you'd have an MMU I guess?
> 
> Signed-off-by: Daniel Palmer <daniel@thingy.jp>
> ---
> 
> Did I miss anything massive that is going to come back and bite me?
> Maybe it would have made more sense just to change the erofs
> code so on !CONFIG_MMU it doesn't use vmap?

Yes, I think that would be preferrable. I'm sure[1] it's possible to get
EROFS to not use vmap().

I know your patch took some effort but trying to implement this for nommu
is fundamentally not possible. Like your vmap() "bouncing" is completely
fake and users expecting that the pages are actually shared will have a nasty
surprise.


So, yeah, please see if you can find a way forward for erofs :)

[1] I hope.

-- 
Pedro