[v2] xen/riscv: introduce p2m functionality

[PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Oleksii Kurochko 4 months, 3 weeks ago

Implement p2m_set_allocation() to construct p2m pages pool for guests
based on required number of pages.

This is implemented by:
- Adding a `struct paging_domain` which contains a freelist, a
  counter variable and a spinlock to `struct arch_domain` to
  indicate the free p2m pages and the number of p2m total pages in
  the p2m pages pool.
- Adding a helper `p2m_set_allocation` to set the p2m pages pool
  size. This helper should be called before allocating memory for
  a guest and is called from domain_p2m_set_allocation(), the latter
  is a part of common dom0less code.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in v2:
 - Drop the comment above inclusion of <xen/event.h> in riscv/p2m.c.
 - Use ACCESS_ONCE() for lhs and rhs for the expressions in
   p2m_set_allocation().
---
 xen/arch/riscv/include/asm/domain.h | 12 ++++++
 xen/arch/riscv/p2m.c                | 59 +++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/xen/arch/riscv/include/asm/domain.h b/xen/arch/riscv/include/asm/domain.h
index b9a03e91c5..b818127f9f 100644
--- a/xen/arch/riscv/include/asm/domain.h
+++ b/xen/arch/riscv/include/asm/domain.h
@@ -2,6 +2,8 @@
 #ifndef ASM__RISCV__DOMAIN_H
 #define ASM__RISCV__DOMAIN_H
 
+#include <xen/mm.h>
+#include <xen/spinlock.h>
 #include <xen/xmalloc.h>
 #include <public/hvm/params.h>
 
@@ -18,10 +20,20 @@ struct arch_vcpu_io {
 struct arch_vcpu {
 };
 
+struct paging_domain {
+    spinlock_t lock;
+    /* Free P2M pages from the pre-allocated P2M pool */
+    struct page_list_head p2m_freelist;
+    /* Number of pages from the pre-allocated P2M pool */
+    unsigned long p2m_total_pages;
+};
+
 struct arch_domain {
     struct hvm_domain hvm;
 
     struct p2m_domain p2m;
+
+    struct paging_domain paging;
 };
 
 #include <xen/sched.h>
diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c
index 9f7fd8290a..f33c7147ff 100644
--- a/xen/arch/riscv/p2m.c
+++ b/xen/arch/riscv/p2m.c
@@ -1,4 +1,5 @@
 #include <xen/bitops.h>
+#include <xen/event.h>
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <xen/spinlock.h>
@@ -105,6 +106,9 @@ int p2m_init(struct domain *d)
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int rc;
 
+    spin_lock_init(&d->arch.paging.lock);
+    INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);
+
     p2m->vmid = INVALID_VMID;
 
     rc = p2m_alloc_vmid(d);
@@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
 
     return 0;
 }
+
+/*
+ * Set the pool of pages to the required number of pages.
+ * Returns 0 for success, non-zero for failure.
+ * Call with d->arch.paging.lock held.
+ */
+int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
+{
+    struct page_info *pg;
+
+    ASSERT(spin_is_locked(&d->arch.paging.lock));
+
+    for ( ; ; )
+    {
+        if ( d->arch.paging.p2m_total_pages < pages )
+        {
+            /* Need to allocate more memory from domheap */
+            pg = alloc_domheap_page(d, MEMF_no_owner);
+            if ( pg == NULL )
+            {
+                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
+                return -ENOMEM;
+            }
+            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
+            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
+        }
+        else if ( d->arch.paging.p2m_total_pages > pages )
+        {
+            /* Need to return memory to domheap */
+            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
+            if( pg )
+            {
+                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
+                free_domheap_page(pg);
+            }
+            else
+            {
+                printk(XENLOG_ERR
+                       "Failed to free P2M pages, P2M freelist is empty.\n");
+                return -ENOMEM;
+            }
+        }
+        else
+            break;
+
+        /* Check to see if we need to yield and try again */
+        if ( preempted && general_preempt_check() )
+        {
+            *preempted = true;
+            return -ERESTART;
+        }
+    }
+
+    return 0;
+}
-- 
2.49.0

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Jan Beulich 4 months ago

On 10.06.2025 15:05, Oleksii Kurochko wrote:
> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>  
>      return 0;
>  }
> +
> +/*
> + * Set the pool of pages to the required number of pages.
> + * Returns 0 for success, non-zero for failure.
> + * Call with d->arch.paging.lock held.
> + */
> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
> +{
> +    struct page_info *pg;
> +
> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
> +
> +    for ( ; ; )
> +    {
> +        if ( d->arch.paging.p2m_total_pages < pages )
> +        {
> +            /* Need to allocate more memory from domheap */
> +            pg = alloc_domheap_page(d, MEMF_no_owner);
> +            if ( pg == NULL )
> +            {
> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
> +                return -ENOMEM;
> +            }
> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
> +        }
> +        else if ( d->arch.paging.p2m_total_pages > pages )
> +        {
> +            /* Need to return memory to domheap */
> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
> +            if( pg )
> +            {
> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
> +                free_domheap_page(pg);
> +            }
> +            else
> +            {
> +                printk(XENLOG_ERR
> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
> +                return -ENOMEM;
> +            }
> +        }
> +        else
> +            break;
> +
> +        /* Check to see if we need to yield and try again */
> +        if ( preempted && general_preempt_check() )
> +        {
> +            *preempted = true;
> +            return -ERESTART;
> +        }
> +    }
> +
> +    return 0;
> +}

Btw, with the order-2 requirement for the root page table, you may want to
consider an alternative approach: Here you could allocate some order-2
pages (possibly up to as many as a domain might need, which right now
would be exactly one), put them on a separate list, and consume the root
table(s) from there. If you run out of pages on the order-0 list, you
could shatter a page from the order-2 one (as long as that's still non-
empty). The difficulty would be with freeing, where a previously shattered
order-2 page would be nice to re-combine once all of its constituents are
free again. The main benefit would be avoiding the back and forth in patch
6.

Jan

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Oleksii Kurochko 4 months ago

On 7/1/25 3:04 PM, Jan Beulich wrote:
> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>>   
>>       return 0;
>>   }
>> +
>> +/*
>> + * Set the pool of pages to the required number of pages.
>> + * Returns 0 for success, non-zero for failure.
>> + * Call with d->arch.paging.lock held.
>> + */
>> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
>> +{
>> +    struct page_info *pg;
>> +
>> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
>> +
>> +    for ( ; ; )
>> +    {
>> +        if ( d->arch.paging.p2m_total_pages < pages )
>> +        {
>> +            /* Need to allocate more memory from domheap */
>> +            pg = alloc_domheap_page(d, MEMF_no_owner);
>> +            if ( pg == NULL )
>> +            {
>> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
>> +                return -ENOMEM;
>> +            }
>> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
>> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
>> +        }
>> +        else if ( d->arch.paging.p2m_total_pages > pages )
>> +        {
>> +            /* Need to return memory to domheap */
>> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
>> +            if( pg )
>> +            {
>> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
>> +                free_domheap_page(pg);
>> +            }
>> +            else
>> +            {
>> +                printk(XENLOG_ERR
>> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
>> +                return -ENOMEM;
>> +            }
>> +        }
>> +        else
>> +            break;
>> +
>> +        /* Check to see if we need to yield and try again */
>> +        if ( preempted && general_preempt_check() )
>> +        {
>> +            *preempted = true;
>> +            return -ERESTART;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
> Btw, with the order-2 requirement for the root page table, you may want to
> consider an alternative approach: Here you could allocate some order-2
> pages (possibly up to as many as a domain might need, which right now
> would be exactly one), put them on a separate list, and consume the root
> table(s) from there. If you run out of pages on the order-0 list, you
> could shatter a page from the order-2 one (as long as that's still non-
> empty). The difficulty would be with freeing, where a previously shattered
> order-2 page would be nice to re-combine once all of its constituents are
> free again.

Do we really need to re-combine shattered order-2 pages?
It seems like the only usage for this order-2-list is to have 1 order-2 page
for root page table. All other pages are 4k pages so even if we won't re-combine
them, nothing serious will happen.

And if we aren't going to have more usages of order-2 pages list then do we
really need a separate order-2 list just basically for root page table?

...

>   The main benefit would be avoiding the back and forth in patch
> 6.

...
Can’t we just avoid putting the pages (which will get back) for the root page table into the
freelist at all? That way, there would be no need to return them
later—something like:

Something like:
int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
{
     struct page_info *pg;

     ASSERT(spin_is_locked(&d->arch.paging.lock));

     pages -= root_page_table_num;
     
     for ( ; ; )
     {
         if ( d->arch.paging.p2m_total_pages < pages )
         {
    ...
}

~ Oleksii

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Jan Beulich 4 months ago

On 02.07.2025 13:48, Oleksii Kurochko wrote:
> On 7/1/25 3:04 PM, Jan Beulich wrote:
>> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>>> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>>>   
>>>       return 0;
>>>   }
>>> +
>>> +/*
>>> + * Set the pool of pages to the required number of pages.
>>> + * Returns 0 for success, non-zero for failure.
>>> + * Call with d->arch.paging.lock held.
>>> + */
>>> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
>>> +{
>>> +    struct page_info *pg;
>>> +
>>> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
>>> +
>>> +    for ( ; ; )
>>> +    {
>>> +        if ( d->arch.paging.p2m_total_pages < pages )
>>> +        {
>>> +            /* Need to allocate more memory from domheap */
>>> +            pg = alloc_domheap_page(d, MEMF_no_owner);
>>> +            if ( pg == NULL )
>>> +            {
>>> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
>>> +                return -ENOMEM;
>>> +            }
>>> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
>>> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
>>> +        }
>>> +        else if ( d->arch.paging.p2m_total_pages > pages )
>>> +        {
>>> +            /* Need to return memory to domheap */
>>> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
>>> +            if( pg )
>>> +            {
>>> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
>>> +                free_domheap_page(pg);
>>> +            }
>>> +            else
>>> +            {
>>> +                printk(XENLOG_ERR
>>> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
>>> +                return -ENOMEM;
>>> +            }
>>> +        }
>>> +        else
>>> +            break;
>>> +
>>> +        /* Check to see if we need to yield and try again */
>>> +        if ( preempted && general_preempt_check() )
>>> +        {
>>> +            *preempted = true;
>>> +            return -ERESTART;
>>> +        }
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>> Btw, with the order-2 requirement for the root page table, you may want to
>> consider an alternative approach: Here you could allocate some order-2
>> pages (possibly up to as many as a domain might need, which right now
>> would be exactly one), put them on a separate list, and consume the root
>> table(s) from there. If you run out of pages on the order-0 list, you
>> could shatter a page from the order-2 one (as long as that's still non-
>> empty). The difficulty would be with freeing, where a previously shattered
>> order-2 page would be nice to re-combine once all of its constituents are
>> free again.
> 
> Do we really need to re-combine shattered order-2 pages?
> It seems like the only usage for this order-2-list is to have 1 order-2 page
> for root page table. All other pages are 4k pages so even if we won't re-combine
> them, nothing serious will happen.

That's true as long as you have only the host-P2M for each domain. Once you
have alternative or nested ones, things may change (unless they all have
their roots also set up right during domain creation, which would seem
wasteful to me).

>>   The main benefit would be avoiding the back and forth in patch
>> 6.
> 
> ...
> Can’t we just avoid putting the pages (which will get back) for the root page table into the
> freelist at all?

Again, this may be fine as long as there's only the host-P2M. That sole root
won't ever be freed anyway during the lifetime of a domain.

Jan

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Oleksii Kurochko 4 months ago

On 7/2/25 1:56 PM, Jan Beulich wrote:
> On 02.07.2025 13:48, Oleksii Kurochko wrote:
>> On 7/1/25 3:04 PM, Jan Beulich wrote:
>>> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>>>> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>>>>    
>>>>        return 0;
>>>>    }
>>>> +
>>>> +/*
>>>> + * Set the pool of pages to the required number of pages.
>>>> + * Returns 0 for success, non-zero for failure.
>>>> + * Call with d->arch.paging.lock held.
>>>> + */
>>>> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
>>>> +{
>>>> +    struct page_info *pg;
>>>> +
>>>> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
>>>> +
>>>> +    for ( ; ; )
>>>> +    {
>>>> +        if ( d->arch.paging.p2m_total_pages < pages )
>>>> +        {
>>>> +            /* Need to allocate more memory from domheap */
>>>> +            pg = alloc_domheap_page(d, MEMF_no_owner);
>>>> +            if ( pg == NULL )
>>>> +            {
>>>> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
>>>> +                return -ENOMEM;
>>>> +            }
>>>> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
>>>> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
>>>> +        }
>>>> +        else if ( d->arch.paging.p2m_total_pages > pages )
>>>> +        {
>>>> +            /* Need to return memory to domheap */
>>>> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
>>>> +            if( pg )
>>>> +            {
>>>> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
>>>> +                free_domheap_page(pg);
>>>> +            }
>>>> +            else
>>>> +            {
>>>> +                printk(XENLOG_ERR
>>>> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
>>>> +                return -ENOMEM;
>>>> +            }
>>>> +        }
>>>> +        else
>>>> +            break;
>>>> +
>>>> +        /* Check to see if we need to yield and try again */
>>>> +        if ( preempted && general_preempt_check() )
>>>> +        {
>>>> +            *preempted = true;
>>>> +            return -ERESTART;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>> Btw, with the order-2 requirement for the root page table, you may want to
>>> consider an alternative approach: Here you could allocate some order-2
>>> pages (possibly up to as many as a domain might need, which right now
>>> would be exactly one), put them on a separate list, and consume the root
>>> table(s) from there. If you run out of pages on the order-0 list, you
>>> could shatter a page from the order-2 one (as long as that's still non-
>>> empty). The difficulty would be with freeing, where a previously shattered
>>> order-2 page would be nice to re-combine once all of its constituents are
>>> free again.
>> Do we really need to re-combine shattered order-2 pages?
>> It seems like the only usage for this order-2-list is to have 1 order-2 page
>> for root page table. All other pages are 4k pages so even if we won't re-combine
>> them, nothing serious will happen.
> That's true as long as you have only the host-P2M for each domain. Once you
> have alternative or nested ones, things may change (unless they all have
> their roots also set up right during domain creation, which would seem
> wasteful to me).

I don't know how it is implemented on x86, but I thought that if it is needed alternative
or nested P2Ms then it is needed to provide separated from host-P2M page tables (root page
table including).

~ Oleksii

>
>>>    The main benefit would be avoiding the back and forth in patch
>>> 6.
>> ...
>> Can’t we just avoid putting the pages (which will get back) for the root page table into the
>> freelist at all?
> Again, this may be fine as long as there's only the host-P2M. That sole root
> won't ever be freed anyway during the lifetime of a domain.
>
> Jan

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Jan Beulich 4 months ago

On 02.07.2025 14:34, Oleksii Kurochko wrote:
> 
> On 7/2/25 1:56 PM, Jan Beulich wrote:
>> On 02.07.2025 13:48, Oleksii Kurochko wrote:
>>> On 7/1/25 3:04 PM, Jan Beulich wrote:
>>>> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>>>>> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>>>>>    
>>>>>        return 0;
>>>>>    }
>>>>> +
>>>>> +/*
>>>>> + * Set the pool of pages to the required number of pages.
>>>>> + * Returns 0 for success, non-zero for failure.
>>>>> + * Call with d->arch.paging.lock held.
>>>>> + */
>>>>> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
>>>>> +{
>>>>> +    struct page_info *pg;
>>>>> +
>>>>> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
>>>>> +
>>>>> +    for ( ; ; )
>>>>> +    {
>>>>> +        if ( d->arch.paging.p2m_total_pages < pages )
>>>>> +        {
>>>>> +            /* Need to allocate more memory from domheap */
>>>>> +            pg = alloc_domheap_page(d, MEMF_no_owner);
>>>>> +            if ( pg == NULL )
>>>>> +            {
>>>>> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
>>>>> +                return -ENOMEM;
>>>>> +            }
>>>>> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
>>>>> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
>>>>> +        }
>>>>> +        else if ( d->arch.paging.p2m_total_pages > pages )
>>>>> +        {
>>>>> +            /* Need to return memory to domheap */
>>>>> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
>>>>> +            if( pg )
>>>>> +            {
>>>>> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
>>>>> +                free_domheap_page(pg);
>>>>> +            }
>>>>> +            else
>>>>> +            {
>>>>> +                printk(XENLOG_ERR
>>>>> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
>>>>> +                return -ENOMEM;
>>>>> +            }
>>>>> +        }
>>>>> +        else
>>>>> +            break;
>>>>> +
>>>>> +        /* Check to see if we need to yield and try again */
>>>>> +        if ( preempted && general_preempt_check() )
>>>>> +        {
>>>>> +            *preempted = true;
>>>>> +            return -ERESTART;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>> Btw, with the order-2 requirement for the root page table, you may want to
>>>> consider an alternative approach: Here you could allocate some order-2
>>>> pages (possibly up to as many as a domain might need, which right now
>>>> would be exactly one), put them on a separate list, and consume the root
>>>> table(s) from there. If you run out of pages on the order-0 list, you
>>>> could shatter a page from the order-2 one (as long as that's still non-
>>>> empty). The difficulty would be with freeing, where a previously shattered
>>>> order-2 page would be nice to re-combine once all of its constituents are
>>>> free again.
>>> Do we really need to re-combine shattered order-2 pages?
>>> It seems like the only usage for this order-2-list is to have 1 order-2 page
>>> for root page table. All other pages are 4k pages so even if we won't re-combine
>>> them, nothing serious will happen.
>> That's true as long as you have only the host-P2M for each domain. Once you
>> have alternative or nested ones, things may change (unless they all have
>> their roots also set up right during domain creation, which would seem
>> wasteful to me).
> 
> I don't know how it is implemented on x86, but I thought that if it is needed alternative
> or nested P2Ms then it is needed to provide separated from host-P2M page tables (root page
> table including).

Correct, hence why you will then need to allocate multiple root tables.
Those secondary page tables are nevertheless all allocated from the
single pool that a domain has.

Jan

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Oleksii Kurochko 4 months ago

On 7/1/25 3:04 PM, Jan Beulich wrote:
> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>>   
>>       return 0;
>>   }
>> +
>> +/*
>> + * Set the pool of pages to the required number of pages.
>> + * Returns 0 for success, non-zero for failure.
>> + * Call with d->arch.paging.lock held.
>> + */
>> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
>> +{
>> +    struct page_info *pg;
>> +
>> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
>> +
>> +    for ( ; ; )
>> +    {
>> +        if ( d->arch.paging.p2m_total_pages < pages )
>> +        {
>> +            /* Need to allocate more memory from domheap */
>> +            pg = alloc_domheap_page(d, MEMF_no_owner);
>> +            if ( pg == NULL )
>> +            {
>> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
>> +                return -ENOMEM;
>> +            }
>> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
>> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
>> +        }
>> +        else if ( d->arch.paging.p2m_total_pages > pages )
>> +        {
>> +            /* Need to return memory to domheap */
>> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
>> +            if( pg )
>> +            {
>> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
>> +                free_domheap_page(pg);
>> +            }
>> +            else
>> +            {
>> +                printk(XENLOG_ERR
>> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
>> +                return -ENOMEM;
>> +            }
>> +        }
>> +        else
>> +            break;
>> +
>> +        /* Check to see if we need to yield and try again */
>> +        if ( preempted && general_preempt_check() )
>> +        {
>> +            *preempted = true;
>> +            return -ERESTART;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
> Btw, with the order-2 requirement for the root page table, you may want to
> consider an alternative approach: Here you could allocate some order-2
> pages (possibly up to as many as a domain might need, which right now
> would be exactly one), put them on a separate list, and consume the root
> table(s) from there. If you run out of pages on the order-0 list, you
> could shatter a page from the order-2 one (as long as that's still non-
> empty). The difficulty would be with freeing, where a previously shattered
> order-2 page would be nice to re-combine once all of its constituents are
> free again. The main benefit would be avoiding the back and forth in patch
> 6.

It is an option.

But I'm still not 100% sure it's necessary to allocate the root page table
from the freelist. We could simply allocate the root page table from the
domheap (as is done for hardware domains) and reserve the freelist for other
pages.
The freelist is specific to Dom0less guest domains and is primarily used to
limit the amount of memory available for the guest—essentially for static
configurations where you want a clear and fixed limit on p2m allocations.

~ Oleksii

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Jan Beulich 4 months ago

On 02.07.2025 12:30, Oleksii Kurochko wrote:
> 
> On 7/1/25 3:04 PM, Jan Beulich wrote:
>> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>>> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>>>   
>>>       return 0;
>>>   }
>>> +
>>> +/*
>>> + * Set the pool of pages to the required number of pages.
>>> + * Returns 0 for success, non-zero for failure.
>>> + * Call with d->arch.paging.lock held.
>>> + */
>>> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
>>> +{
>>> +    struct page_info *pg;
>>> +
>>> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
>>> +
>>> +    for ( ; ; )
>>> +    {
>>> +        if ( d->arch.paging.p2m_total_pages < pages )
>>> +        {
>>> +            /* Need to allocate more memory from domheap */
>>> +            pg = alloc_domheap_page(d, MEMF_no_owner);
>>> +            if ( pg == NULL )
>>> +            {
>>> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
>>> +                return -ENOMEM;
>>> +            }
>>> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
>>> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
>>> +        }
>>> +        else if ( d->arch.paging.p2m_total_pages > pages )
>>> +        {
>>> +            /* Need to return memory to domheap */
>>> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
>>> +            if( pg )
>>> +            {
>>> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
>>> +                free_domheap_page(pg);
>>> +            }
>>> +            else
>>> +            {
>>> +                printk(XENLOG_ERR
>>> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
>>> +                return -ENOMEM;
>>> +            }
>>> +        }
>>> +        else
>>> +            break;
>>> +
>>> +        /* Check to see if we need to yield and try again */
>>> +        if ( preempted && general_preempt_check() )
>>> +        {
>>> +            *preempted = true;
>>> +            return -ERESTART;
>>> +        }
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>> Btw, with the order-2 requirement for the root page table, you may want to
>> consider an alternative approach: Here you could allocate some order-2
>> pages (possibly up to as many as a domain might need, which right now
>> would be exactly one), put them on a separate list, and consume the root
>> table(s) from there. If you run out of pages on the order-0 list, you
>> could shatter a page from the order-2 one (as long as that's still non-
>> empty). The difficulty would be with freeing, where a previously shattered
>> order-2 page would be nice to re-combine once all of its constituents are
>> free again. The main benefit would be avoiding the back and forth in patch
>> 6.
> 
> It is an option.
> 
> But I'm still not 100% sure it's necessary to allocate the root page table
> from the freelist. We could simply allocate the root page table from the
> domheap (as is done for hardware domains) and reserve the freelist for other
> pages.
> The freelist is specific to Dom0less guest domains and is primarily used to
> limit the amount of memory available for the guest—essentially for static
> configurations where you want a clear and fixed limit on p2m allocations.

Is that true? My understanding is that this pre-populated pool is used by
all DomU-s, whether or not under dom0less.

Plus we're meaning to move towards better accounting of memory used by a
domain (besides its actual allocation). Allocating the root table from the
domain heap would move us one small step farther away from there.

Jan

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Oleksii Kurochko 4 months ago

On 7/2/25 12:34 PM, Jan Beulich wrote:
> On 02.07.2025 12:30, Oleksii Kurochko wrote:
>> On 7/1/25 3:04 PM, Jan Beulich wrote:
>>> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>>>> @@ -113,3 +117,58 @@ int p2m_init(struct domain *d)
>>>>    
>>>>        return 0;
>>>>    }
>>>> +
>>>> +/*
>>>> + * Set the pool of pages to the required number of pages.
>>>> + * Returns 0 for success, non-zero for failure.
>>>> + * Call with d->arch.paging.lock held.
>>>> + */
>>>> +int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
>>>> +{
>>>> +    struct page_info *pg;
>>>> +
>>>> +    ASSERT(spin_is_locked(&d->arch.paging.lock));
>>>> +
>>>> +    for ( ; ; )
>>>> +    {
>>>> +        if ( d->arch.paging.p2m_total_pages < pages )
>>>> +        {
>>>> +            /* Need to allocate more memory from domheap */
>>>> +            pg = alloc_domheap_page(d, MEMF_no_owner);
>>>> +            if ( pg == NULL )
>>>> +            {
>>>> +                printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
>>>> +                return -ENOMEM;
>>>> +            }
>>>> +            ACCESS_ONCE(d->arch.paging.p2m_total_pages)++;
>>>> +            page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
>>>> +        }
>>>> +        else if ( d->arch.paging.p2m_total_pages > pages )
>>>> +        {
>>>> +            /* Need to return memory to domheap */
>>>> +            pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
>>>> +            if( pg )
>>>> +            {
>>>> +                ACCESS_ONCE(d->arch.paging.p2m_total_pages)--;
>>>> +                free_domheap_page(pg);
>>>> +            }
>>>> +            else
>>>> +            {
>>>> +                printk(XENLOG_ERR
>>>> +                       "Failed to free P2M pages, P2M freelist is empty.\n");
>>>> +                return -ENOMEM;
>>>> +            }
>>>> +        }
>>>> +        else
>>>> +            break;
>>>> +
>>>> +        /* Check to see if we need to yield and try again */
>>>> +        if ( preempted && general_preempt_check() )
>>>> +        {
>>>> +            *preempted = true;
>>>> +            return -ERESTART;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>> Btw, with the order-2 requirement for the root page table, you may want to
>>> consider an alternative approach: Here you could allocate some order-2
>>> pages (possibly up to as many as a domain might need, which right now
>>> would be exactly one), put them on a separate list, and consume the root
>>> table(s) from there. If you run out of pages on the order-0 list, you
>>> could shatter a page from the order-2 one (as long as that's still non-
>>> empty). The difficulty would be with freeing, where a previously shattered
>>> order-2 page would be nice to re-combine once all of its constituents are
>>> free again. The main benefit would be avoiding the back and forth in patch
>>> 6.
>> It is an option.
>>
>> But I'm still not 100% sure it's necessary to allocate the root page table
>> from the freelist. We could simply allocate the root page table from the
>> domheap (as is done for hardware domains) and reserve the freelist for other
>> pages.
>> The freelist is specific to Dom0less guest domains and is primarily used to
>> limit the amount of memory available for the guest—essentially for static
>> configurations where you want a clear and fixed limit on p2m allocations.
> Is that true? My understanding is that this pre-populated pool is used by
> all DomU-s, whether or not under dom0less.

I think you are right, I just automatically decided so as this pre-populated
pool is set now only in dom0less.

~ Oleksii

>
> Plus we're meaning to move towards better accounting of memory used by a
> domain (besides its actual allocation). Allocating the root table from the
> domain heap would move us one small step farther away from there.
>
> Jan

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Jan Beulich 4 months, 2 weeks ago

On 10.06.2025 15:05, Oleksii Kurochko wrote:
> @@ -18,10 +20,20 @@ struct arch_vcpu_io {
>  struct arch_vcpu {
>  };
>  
> +struct paging_domain {
> +    spinlock_t lock;
> +    /* Free P2M pages from the pre-allocated P2M pool */
> +    struct page_list_head p2m_freelist;
> +    /* Number of pages from the pre-allocated P2M pool */
> +    unsigned long p2m_total_pages;
> +};
> +
>  struct arch_domain {
>      struct hvm_domain hvm;
>  
>      struct p2m_domain p2m;
> +
> +    struct paging_domain paging;

With the separate structures, do you have plans to implement e.g. shadow paging?
Or some other paging mode beyond the basic one based on the H extension? If the
structures are to remain separate, may I suggest that you keep things properly
separated (no matter how e.g. Arm may have it) in terms of naming? I.e. no
single "p2m" inside struct paging_domain.

> @@ -105,6 +106,9 @@ int p2m_init(struct domain *d)
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>      int rc;
>  
> +    spin_lock_init(&d->arch.paging.lock);
> +    INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);

If you want p2m and paging to be separate, you will want to put these in a new
paging_init().

Jan

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Oleksii Kurochko 4 months, 1 week ago

On 6/18/25 5:53 PM, Jan Beulich wrote:
> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>> @@ -18,10 +20,20 @@ struct arch_vcpu_io {
>>   struct arch_vcpu {
>>   };
>>   
>> +struct paging_domain {
>> +    spinlock_t lock;
>> +    /* Free P2M pages from the pre-allocated P2M pool */
>> +    struct page_list_head p2m_freelist;
>> +    /* Number of pages from the pre-allocated P2M pool */
>> +    unsigned long p2m_total_pages;
>> +};
>> +
>>   struct arch_domain {
>>       struct hvm_domain hvm;
>>   
>>       struct p2m_domain p2m;
>> +
>> +    struct paging_domain paging;
> With the separate structures, do you have plans to implement e.g. shadow paging?
> Or some other paging mode beyond the basic one based on the H extension?

No, there is no such plans.

>   If the
> structures are to remain separate, may I suggest that you keep things properly
> separated (no matter how e.g. Arm may have it) in terms of naming? I.e. no
> single "p2m" inside struct paging_domain.

Arm doesn't implement shadow paging too (AFAIK) and probably this approach was
copied from x86, and then to RISC-V.
I thought that a reason for that was just to have two separate entities: one which
covers page tables and which covers the full available guest memory.
And if the only idea of that was to have shadow paging then I don't how it should
be done better. As p2m code is based on Arm's, perhaps, it makes sense to have
this stuff separated, so easier porting will be.

>
>> @@ -105,6 +106,9 @@ int p2m_init(struct domain *d)
>>       struct p2m_domain *p2m = p2m_get_hostp2m(d);
>>       int rc;
>>   
>> +    spin_lock_init(&d->arch.paging.lock);
>> +    INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);
> If you want p2m and paging to be separate, you will want to put these in a new
> paging_init().

I am not really understand what is wrong to have it here, but likely it is because
I don't really get an initial purpose of having p2m and paging separately.
It seems like p2m and paging are connected between each other, so it is fine
to init them together.

~ Oleksii

Re: [PATCH v2 04/17] xen/riscv: construct the P2M pages pool for guests

Posted by Jan Beulich 4 months, 1 week ago

On 25.06.2025 16:48, Oleksii Kurochko wrote:
> 
> On 6/18/25 5:53 PM, Jan Beulich wrote:
>> On 10.06.2025 15:05, Oleksii Kurochko wrote:
>>> @@ -18,10 +20,20 @@ struct arch_vcpu_io {
>>>   struct arch_vcpu {
>>>   };
>>>   
>>> +struct paging_domain {
>>> +    spinlock_t lock;
>>> +    /* Free P2M pages from the pre-allocated P2M pool */
>>> +    struct page_list_head p2m_freelist;
>>> +    /* Number of pages from the pre-allocated P2M pool */
>>> +    unsigned long p2m_total_pages;
>>> +};
>>> +
>>>   struct arch_domain {
>>>       struct hvm_domain hvm;
>>>   
>>>       struct p2m_domain p2m;
>>> +
>>> +    struct paging_domain paging;
>> With the separate structures, do you have plans to implement e.g. shadow paging?
>> Or some other paging mode beyond the basic one based on the H extension?
> 
> No, there is no such plans.
> 
>>   If the
>> structures are to remain separate, may I suggest that you keep things properly
>> separated (no matter how e.g. Arm may have it) in terms of naming? I.e. no
>> single "p2m" inside struct paging_domain.
> 
> Arm doesn't implement shadow paging too (AFAIK) and probably this approach was
> copied from x86, and then to RISC-V.
> I thought that a reason for that was just to have two separate entities: one which
> covers page tables and which covers the full available guest memory.
> And if the only idea of that was to have shadow paging then I don't how it should
> be done better. As p2m code is based on Arm's, perhaps, it makes sense to have
> this stuff separated, so easier porting will be.
> 
>>> @@ -105,6 +106,9 @@ int p2m_init(struct domain *d)
>>>       struct p2m_domain *p2m = p2m_get_hostp2m(d);
>>>       int rc;
>>>   
>>> +    spin_lock_init(&d->arch.paging.lock);
>>> +    INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);
>> If you want p2m and paging to be separate, you will want to put these in a new
>> paging_init().
> 
> I am not really understand what is wrong to have it here, but likely it is because
> I don't really get an initial purpose of having p2m and paging separately.
> It seems like p2m and paging are connected between each other, so it is fine
> to init them together.

If you want to retain the separation, imo you want to follow what x86 has:
paging_domain_init() calling p2m_init(). And d->arch.paging.* would then
be initialized in paging_domain_init(), like x86 has it.

Jan