[PATCH v3 14/20] xen/riscv: Implement p2m_pte_from_mfn() and support PBMT configuration

Oleksii Kurochko posted 20 patches 3 months ago
There is a newer version of this series
[PATCH v3 14/20] xen/riscv: Implement p2m_pte_from_mfn() and support PBMT configuration
Posted by Oleksii Kurochko 3 months ago
This patch adds the initial logic for constructing PTEs from MFNs in the RISC-V
p2m subsystem. It includes:
- Implementation of p2m_pte_from_mfn(): Generates a valid PTE using the
  given MFN, p2m_type_t, including permission encoding and PBMT attribute
  setup.
- New helper p2m_set_permission(): Encodes access rights (r, w, x) into the
  PTE based on both p2m type and access permissions.
- p2m_set_type(): Stores the p2m type in PTE's bits. The storage of types,
  which don't fit PTE bits, will be implemented separately later.

PBMT type encoding support:
- Introduces an enum pbmt_type_t to represent the PBMT field values.
- Maps types like p2m_mmio_direct_dev to p2m_mmio_direct_io, others default
  to pbmt_pma.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V3:
 - s/p2m_entry_from_mfn/p2m_pte_from_mfn.
 - s/pbmt_type_t/pbmt_type.
 - s/pbmt_max/pbmt_count.
 - s/p2m_type_radix_set/p2m_set_type.
 - Rework p2m_set_type() to handle only types which are fited into PTEs bits.
   Other types will be covered separately.
   Update arguments of p2m_set_type(): there is no any reason for p2m anymore.
 - p2m_set_permissions() updates:
   - Update the code in p2m_set_permission() for cases p2m_raw_rw and
     p2m_mmio_direct_io to set proper type permissions.
   - Add cases for p2m_grant_map_rw and p2m_grant_map_ro.
   - Use ASSERT_UNEACHABLE() instead of BUG() in switch cases of
     p2m_set_permissions.
   - Add blank lines non-fall-through case blocks in switch cases.
 - Set MFN before permissions are set in p2m_pte_from_mfn().
 - Update prototype of p2m_entry_from_mfn().
---
Changes in V2:
 - New patch. It was a part of a big patch "xen/riscv: implement p2m mapping
   functionality" which was splitted to smaller.
---
 xen/arch/riscv/include/asm/page.h |  8 +++
 xen/arch/riscv/p2m.c              | 81 +++++++++++++++++++++++++++++--
 2 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index cb303af0c0..4fa0556073 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -74,6 +74,14 @@
 #define PTE_SMALL       BIT(10, UL)
 #define PTE_POPULATE    BIT(11, UL)
 
+enum pbmt_type {
+    pbmt_pma,
+    pbmt_nc,
+    pbmt_io,
+    pbmt_rsvd,
+    pbmt_count,
+};
+
 #define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
 
 #define PTE_PBMT_MASK   (PTE_PBMT_NOCACHE | PTE_PBMT_IO)
diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c
index 2467e459cc..efc7320619 100644
--- a/xen/arch/riscv/p2m.c
+++ b/xen/arch/riscv/p2m.c
@@ -1,3 +1,4 @@
+#include <xen/bug.h>
 #include <xen/domain_page.h>
 #include <xen/mm.h>
 #include <xen/rwlock.h>
@@ -197,6 +198,18 @@ static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn)
     return __map_domain_page(p2m->root + root_table_indx);
 }
 
+static int p2m_set_type(pte_t *pte, p2m_type_t t)
+{
+    int rc = 0;
+
+    if ( t > p2m_ext_storage )
+        panic("unimplemeted\n");
+    else
+        pte->pte |= MASK_INSR(t, P2M_TYPE_PTE_BITS_MASK);
+
+    return rc;
+}
+
 static p2m_type_t p2m_get_type(const pte_t pte)
 {
     p2m_type_t type = MASK_EXTR(pte.pte, P2M_TYPE_PTE_BITS_MASK);
@@ -222,11 +235,71 @@ static inline void p2m_clean_pte(pte_t *p, bool clean_pte)
     p2m_write_pte(p, pte, clean_pte);
 }
 
-static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t)
+static void p2m_set_permission(pte_t *e, p2m_type_t t)
 {
-    panic("%s: hasn't been implemented yet\n", __func__);
+    e->pte &= ~PTE_ACCESS_MASK;
+
+    switch ( t )
+    {
+    case p2m_grant_map_rw:
+    case p2m_ram_rw:
+        e->pte |= PTE_READABLE | PTE_WRITABLE;
+        break;
+
+    case p2m_ext_storage:
+    case p2m_mmio_direct_io:
+        e->pte |= PTE_ACCESS_MASK;
+        break;
+
+    case p2m_invalid:
+        e->pte &= ~(PTE_ACCESS_MASK | PTE_VALID);
+        break;
+
+    case p2m_grant_map_ro:
+        e->pte |= PTE_READABLE;
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+}
+
+static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table)
+{
+    pte_t e = (pte_t) { PTE_VALID };
+
+    switch ( t )
+    {
+    case p2m_mmio_direct_io:
+        e.pte |= PTE_PBMT_IO;
+        break;
+
+    default:
+        break;
+    }
+
+    pte_set_mfn(&e, mfn);
+
+    ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
+
+    if ( !is_table )
+    {
+        p2m_set_permission(&e, t);
+
+        if ( t < p2m_ext_storage )
+            p2m_set_type(&e, t);
+        else
+            panic("unimplemeted\n");
+    }
+    else
+        /*
+         * According to the spec and table "Encoding of PTE R/W/X fields":
+         *   X=W=R=0 -> Pointer to next level of page table.
+         */
+        e.pte &= ~PTE_ACCESS_MASK;
 
-    return (pte_t) { .pte = 0 };
+    return e;
 }
 
 #define P2M_TABLE_MAP_NONE 0
@@ -469,7 +542,7 @@ static int p2m_set_entry(struct p2m_domain *p2m,
         p2m_clean_pte(entry, p2m->clean_pte);
     else
     {
-        pte_t pte = p2m_pte_from_mfn(mfn, t);
+        pte_t pte = p2m_pte_from_mfn(mfn, t, false);
 
         p2m_write_pte(entry, pte, p2m->clean_pte);
 
-- 
2.50.1
Re: [PATCH v3 14/20] xen/riscv: Implement p2m_pte_from_mfn() and support PBMT configuration
Posted by Jan Beulich 2 months, 2 weeks ago
On 31.07.2025 17:58, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/p2m.c
> +++ b/xen/arch/riscv/p2m.c
> @@ -1,3 +1,4 @@
> +#include <xen/bug.h>
>  #include <xen/domain_page.h>
>  #include <xen/mm.h>
>  #include <xen/rwlock.h>
> @@ -197,6 +198,18 @@ static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn)
>      return __map_domain_page(p2m->root + root_table_indx);
>  }
>  
> +static int p2m_set_type(pte_t *pte, p2m_type_t t)
> +{
> +    int rc = 0;
> +
> +    if ( t > p2m_ext_storage )

Seeing this separator enumerator in use, it becomes pretty clear that its name
needs to change, so one doesn't need to go look at its definition to understand
whether it's inclusive or exclusive. (This isn't helped by there presently being
a spare entry, which, when made use of, might then cause problems with
expressions like this one as well.)

> @@ -222,11 +235,71 @@ static inline void p2m_clean_pte(pte_t *p, bool clean_pte)
>      p2m_write_pte(p, pte, clean_pte);
>  }
>  
> -static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t)
> +static void p2m_set_permission(pte_t *e, p2m_type_t t)
>  {
> -    panic("%s: hasn't been implemented yet\n", __func__);
> +    e->pte &= ~PTE_ACCESS_MASK;
> +
> +    switch ( t )
> +    {
> +    case p2m_grant_map_rw:
> +    case p2m_ram_rw:
> +        e->pte |= PTE_READABLE | PTE_WRITABLE;
> +        break;

While I agree for r/w grants, shouldn't r/w RAM also be executable?

> +    case p2m_ext_storage:

Why exactly would this placeholder ...

> +    case p2m_mmio_direct_io:
> +        e->pte |= PTE_ACCESS_MASK;
> +        break;

... gain full access? It shouldn't make it here at all, should it?

> +
> +    case p2m_invalid:
> +        e->pte &= ~(PTE_ACCESS_MASK | PTE_VALID);

Redundantly masking off PTE_ACCESS_MASK? (Plus, for the entry to be
invalid, turning off PTE_VALID alone ought to suffice anyway?)

> +        break;
> +
> +    case p2m_grant_map_ro:
> +        e->pte |= PTE_READABLE;
> +        break;
> +
> +    default:
> +        ASSERT_UNREACHABLE();
> +        break;
> +    }
> +}
> +
> +static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table)
> +{
> +    pte_t e = (pte_t) { PTE_VALID };

This and the rest of the function demand that mfn != INVALID_MFN, no matter
whether ...

> +    switch ( t )
> +    {
> +    case p2m_mmio_direct_io:
> +        e.pte |= PTE_PBMT_IO;
> +        break;
> +
> +    default:
> +        break;
> +    }
> +
> +    pte_set_mfn(&e, mfn);
> +
> +    ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));

... PADDR_MASK is actually narrow enough to catch that case. Maybe best to
add an explicit assertion to that effect?

> +    if ( !is_table )
> +    {
> +        p2m_set_permission(&e, t);
> +
> +        if ( t < p2m_ext_storage )
> +            p2m_set_type(&e, t);
> +        else
> +            panic("unimplemeted\n");

The check is already done inside p2m_set_type() - why open-code it here?

Jan
Re: [PATCH v3 14/20] xen/riscv: Implement p2m_pte_from_mfn() and support PBMT configuration
Posted by Oleksii Kurochko 2 months, 2 weeks ago
On 8/11/25 1:36 PM, Jan Beulich wrote:
> On 31.07.2025 17:58, Oleksii Kurochko wrote:
>> --- a/xen/arch/riscv/p2m.c
>> +++ b/xen/arch/riscv/p2m.c
>> @@ -1,3 +1,4 @@
>> +#include <xen/bug.h>
>>   #include <xen/domain_page.h>
>>   #include <xen/mm.h>
>>   #include <xen/rwlock.h>
>> @@ -197,6 +198,18 @@ static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn)
>>       return __map_domain_page(p2m->root + root_table_indx);
>>   }
>>   
>> +static int p2m_set_type(pte_t *pte, p2m_type_t t)
>> +{
>> +    int rc = 0;
>> +
>> +    if ( t > p2m_ext_storage )
> Seeing this separator enumerator in use, it becomes pretty clear that its name
> needs to change, so one doesn't need to go look at its definition to understand
> whether it's inclusive or exclusive. (This isn't helped by there presently being
> a spare entry, which, when made use of, might then cause problems with
> expressions like this one as well.)

Then|p2m_pte_type_count| might be a better name, as it indicates how many types are
stored directly in the PTE bits.

>
>> @@ -222,11 +235,71 @@ static inline void p2m_clean_pte(pte_t *p, bool clean_pte)
>>       p2m_write_pte(p, pte, clean_pte);
>>   }
>>   
>> -static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t)
>> +static void p2m_set_permission(pte_t *e, p2m_type_t t)
>>   {
>> -    panic("%s: hasn't been implemented yet\n", __func__);
>> +    e->pte &= ~PTE_ACCESS_MASK;
>> +
>> +    switch ( t )
>> +    {
>> +    case p2m_grant_map_rw:
>> +    case p2m_ram_rw:
>> +        e->pte |= PTE_READABLE | PTE_WRITABLE;
>> +        break;
> While I agree for r/w grants, shouldn't r/w RAM also be executable?
>
>> +    case p2m_ext_storage:
> Why exactly would this placeholder ...
>
>> +    case p2m_mmio_direct_io:
>> +        e->pte |= PTE_ACCESS_MASK;
>> +        break;
> ... gain full access? It shouldn't make it here at all, should it?

I missed to add break between them, but I don't remember why I
put it here.
It could be freely moved before "default".

And, yes, you are right it seems like is shouldn't be handled at all
in this function as this function isn't expected to be called with
this type as this type only is used to indicate that a real type is
stored somwehere.

>
>> +
>> +    case p2m_invalid:
>> +        e->pte &= ~(PTE_ACCESS_MASK | PTE_VALID);
> Redundantly masking off PTE_ACCESS_MASK? (Plus, for the entry to be
> invalid, turning off PTE_VALID alone ought to suffice anyway?)

Agree, turning off PTE_VALID would be just enough.

>> +        break;
>> +
>> +    case p2m_grant_map_ro:
>> +        e->pte |= PTE_READABLE;
>> +        break;
>> +
>> +    default:
>> +        ASSERT_UNREACHABLE();
>> +        break;
>> +    }
>> +}
>> +
>> +static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table)
>> +{
>> +    pte_t e = (pte_t) { PTE_VALID };
> This and the rest of the function demand that mfn != INVALID_MFN, no matter
> whether ...
>
>> +    switch ( t )
>> +    {
>> +    case p2m_mmio_direct_io:
>> +        e.pte |= PTE_PBMT_IO;
>> +        break;
>> +
>> +    default:
>> +        break;
>> +    }
>> +
>> +    pte_set_mfn(&e, mfn);
>> +
>> +    ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
> ... PADDR_MASK is actually narrow enough to catch that case. Maybe best to
> add an explicit assertion to that effect?

Then it should be enough instead of what we have now:
   ASSERT(mfn_valid(mfn));

>
>> +    if ( !is_table )
>> +    {
>> +        p2m_set_permission(&e, t);
>> +
>> +        if ( t < p2m_ext_storage )
>> +            p2m_set_type(&e, t);
>> +        else
>> +            panic("unimplemeted\n");
> The check is already done inside p2m_set_type() - why open-code it here?

It isn't really matters now (so could be dropped), but in further patch this part
of code will look like:
         metadata[indx].pte = p2m_invalid;

         if ( t < p2m_ext_storage )
             p2m_set_type(&e, t, indx);
         else
         {
             e.pte |= MASK_INSR(p2m_ext_storage, P2M_TYPE_PTE_BITS_MASK);
             p2m_set_type(metadata, t, indx);
         }
So my intention was to re-use p2m_set_type() without changing of a prototype. So,
if a type is stored in PTE bits then we pass PTE directly, if not - then pass
metadata.

Thanks.

~ Oleksii
Re: [PATCH v3 14/20] xen/riscv: Implement p2m_pte_from_mfn() and support PBMT configuration
Posted by Jan Beulich 2 months, 2 weeks ago
On 11.08.2025 16:44, Oleksii Kurochko wrote:
> On 8/11/25 1:36 PM, Jan Beulich wrote:
>> On 31.07.2025 17:58, Oleksii Kurochko wrote:
>>> +static pte_t p2m_pte_from_mfn(mfn_t mfn, p2m_type_t t, bool is_table)
>>> +{
>>> +    pte_t e = (pte_t) { PTE_VALID };
>> This and the rest of the function demand that mfn != INVALID_MFN, no matter
>> whether ...
>>
>>> +    switch ( t )
>>> +    {
>>> +    case p2m_mmio_direct_io:
>>> +        e.pte |= PTE_PBMT_IO;
>>> +        break;
>>> +
>>> +    default:
>>> +        break;
>>> +    }
>>> +
>>> +    pte_set_mfn(&e, mfn);
>>> +
>>> +    ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
>> ... PADDR_MASK is actually narrow enough to catch that case. Maybe best to
>> add an explicit assertion to that effect?
> 
> Then it should be enough instead of what we have now:
>    ASSERT(mfn_valid(mfn));

No, that would exclude MMIO living beyond max_page.

>>> +    if ( !is_table )
>>> +    {
>>> +        p2m_set_permission(&e, t);
>>> +
>>> +        if ( t < p2m_ext_storage )
>>> +            p2m_set_type(&e, t);
>>> +        else
>>> +            panic("unimplemeted\n");
>> The check is already done inside p2m_set_type() - why open-code it here?
> 
> It isn't really matters now (so could be dropped), but in further patch this part
> of code will look like:
>          metadata[indx].pte = p2m_invalid;
> 
>          if ( t < p2m_ext_storage )
>              p2m_set_type(&e, t, indx);
>          else
>          {
>              e.pte |= MASK_INSR(p2m_ext_storage, P2M_TYPE_PTE_BITS_MASK);
>              p2m_set_type(metadata, t, indx);
>          }
> So my intention was to re-use p2m_set_type() without changing of a prototype. So,
> if a type is stored in PTE bits then we pass PTE directly, if not - then pass
> metadata.

Then at the very least p2m_set_type() may not be a good name; a function of this
name imo should set the type, whatever it takes to do so. But I'm unconvinced of
the model as a whole.

Jan