[PATCH v2 03/16] xen/riscv: introduce support of Svpbmt extension

Oleksii Kurochko posted 16 patches 5 months, 4 weeks ago
Only 14 patches received!
There is a newer version of this series
[PATCH v2 03/16] xen/riscv: introduce support of Svpbmt extension
Posted by Oleksii Kurochko 5 months, 4 weeks ago
Svpbmt extension is necessary for chaning the memory type for a page contains
a combination of attributes that indicate the cacheability, idempotency,
and ordering properties for access to that page.

As a part of the patch the following is introduced:
- Svpbmt memory type defintions: PTE_PBMT_{NOCACHE,IO}.
- PAGE_HYPERVISOR_{NOCACHE,WC}.
- RISCV_ISA_EXT_svpbmt and add a check in runtime that Svpbmt is
  supported by platform.
- Update riscv/booting.txt with information about Svpbmt.
- Update logic of pt_update_entry() to take into account PBMT bits.

Use 'unsigned long' for pte_attr_t as PMBT bits are 61 and 62 and it doesn't
fit into 'unsigned int'. Also, update function prototypes which uses
'unsigned int' for flags/attibutes.

Enable Svpbmt for testing in QEMU as Svpmbt is now mandatory for
Xen work.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in v2:
 - new patch.
---
 automation/scripts/qemu-smoke-riscv64.sh |  1 +
 docs/misc/riscv/booting.txt              |  4 ++++
 xen/arch/riscv/Kconfig                   | 14 ++++++++++++++
 xen/arch/riscv/cpufeature.c              |  2 ++
 xen/arch/riscv/include/asm/cpufeature.h  |  1 +
 xen/arch/riscv/include/asm/fixmap.h      |  2 +-
 xen/arch/riscv/include/asm/mm-types.h    |  8 ++++++++
 xen/arch/riscv/include/asm/page.h        | 17 ++++++++++++++++-
 xen/arch/riscv/pt.c                      | 20 +++++++++++---------
 9 files changed, 58 insertions(+), 11 deletions(-)
 create mode 100644 xen/arch/riscv/include/asm/mm-types.h

diff --git a/automation/scripts/qemu-smoke-riscv64.sh b/automation/scripts/qemu-smoke-riscv64.sh
index b2e112c942..25f9e4190e 100755
--- a/automation/scripts/qemu-smoke-riscv64.sh
+++ b/automation/scripts/qemu-smoke-riscv64.sh
@@ -7,6 +7,7 @@ rm -f smoke.serial
 
 export TEST_CMD="qemu-system-riscv64 \
     -M virt,aia=aplic-imsic \
+    -cpu rv64,svpbmt=on \
     -smp 1 \
     -nographic \
     -m 2g \
diff --git a/docs/misc/riscv/booting.txt b/docs/misc/riscv/booting.txt
index 3a8474a27d..e100bde575 100644
--- a/docs/misc/riscv/booting.txt
+++ b/docs/misc/riscv/booting.txt
@@ -18,3 +18,7 @@ Xen is run:
 - Zihintpause:
   On a system that doesn't have this extension, cpu_relax() should be
   implemented properly.
+- SVPBMT is mandatory to enable changing the memory attributes of a page.
+  For platforms that do not support SVPBMT, it is necessary to introduce a
+  similar mechanism as described in:
+  https://lore.kernel.org/all/20241102000843.1301099-1-samuel.holland@sifive.com/
diff --git a/xen/arch/riscv/Kconfig b/xen/arch/riscv/Kconfig
index d882e0a059..60520dab57 100644
--- a/xen/arch/riscv/Kconfig
+++ b/xen/arch/riscv/Kconfig
@@ -10,11 +10,25 @@ config RISCV
 config RISCV_64
 	def_bool y
 	select 64BIT
+	select HAS_SVPBMT
 
 config ARCH_DEFCONFIG
 	string
 	default "arch/riscv/configs/tiny64_defconfig"
 
+config HAS_SVPBMT
+	bool
+	depends on RISCV_64
+	help
+	  This config enables usage of Svpbmt ISA-extension ( Supervisor-mode:
+	  page-based memory types).
+
+	  The memory type for a page contains a combination of attributes
+	  that indicate the cacheability, idempotency, and ordering
+	  properties for access to that page.
+
+	  The Svpbmt extension is only available on 64-bit cpus.
+
 menu "Architecture Features"
 
 source "arch/Kconfig"
diff --git a/xen/arch/riscv/cpufeature.c b/xen/arch/riscv/cpufeature.c
index 3246a03624..b7d5ec6580 100644
--- a/xen/arch/riscv/cpufeature.c
+++ b/xen/arch/riscv/cpufeature.c
@@ -137,6 +137,7 @@ const struct riscv_isa_ext_data __initconst riscv_isa_ext[] = {
     RISCV_ISA_EXT_DATA(zbs),
     RISCV_ISA_EXT_DATA(smaia),
     RISCV_ISA_EXT_DATA(ssaia),
+    RISCV_ISA_EXT_DATA(svpbmt),
 };
 
 static const struct riscv_isa_ext_data __initconst required_extensions[] = {
@@ -151,6 +152,7 @@ static const struct riscv_isa_ext_data __initconst required_extensions[] = {
     RISCV_ISA_EXT_DATA(zifencei),
     RISCV_ISA_EXT_DATA(zihintpause),
     RISCV_ISA_EXT_DATA(zbb),
+    RISCV_ISA_EXT_DATA(svpbmt),
 };
 
 static bool __init is_lowercase_extension_name(const char *str)
diff --git a/xen/arch/riscv/include/asm/cpufeature.h b/xen/arch/riscv/include/asm/cpufeature.h
index 1015b6ee44..768b84b769 100644
--- a/xen/arch/riscv/include/asm/cpufeature.h
+++ b/xen/arch/riscv/include/asm/cpufeature.h
@@ -37,6 +37,7 @@ enum riscv_isa_ext_id {
     RISCV_ISA_EXT_zbs,
     RISCV_ISA_EXT_smaia,
     RISCV_ISA_EXT_ssaia,
+    RISCV_ISA_EXT_svpbmt,
     RISCV_ISA_EXT_MAX
 };
 
diff --git a/xen/arch/riscv/include/asm/fixmap.h b/xen/arch/riscv/include/asm/fixmap.h
index e399a15f53..5990c964aa 100644
--- a/xen/arch/riscv/include/asm/fixmap.h
+++ b/xen/arch/riscv/include/asm/fixmap.h
@@ -33,7 +33,7 @@
 extern pte_t xen_fixmap[];
 
 /* Map a page in a fixmap entry */
-void set_fixmap(unsigned int map, mfn_t mfn, unsigned int flags);
+void set_fixmap(unsigned int map, mfn_t mfn, pte_attr_t flags);
 /* Remove a mapping from a fixmap entry */
 void clear_fixmap(unsigned int map);
 
diff --git a/xen/arch/riscv/include/asm/mm-types.h b/xen/arch/riscv/include/asm/mm-types.h
new file mode 100644
index 0000000000..fa512064b8
--- /dev/null
+++ b/xen/arch/riscv/include/asm/mm-types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef ASM_RISCV_MM_TYPES_H
+#define ASM_RISCV_MM_TYPES_H
+
+typedef unsigned long pte_attr_t;
+
+#endif /* ASM_RISCV_MM_TYPES_H */
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index bf8988f657..2af4823170 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -46,6 +46,8 @@
 #define PAGE_HYPERVISOR_RX          (PTE_VALID | PTE_READABLE | PTE_EXECUTABLE)
 
 #define PAGE_HYPERVISOR             PAGE_HYPERVISOR_RW
+#define PAGE_HYPERVISOR_NOCACHE     (PAGE_HYPERVISOR_RW | PTE_PMBT_IO)
+#define PAGE_HYPERVISOR_WC          (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE)
 
 /*
  * The PTE format does not contain the following bits within itself;
@@ -56,8 +58,21 @@
 #define PTE_SMALL       BIT(10, UL)
 #define PTE_POPULATE    BIT(11, UL)
 
+/*
+ * [62:61] Svpbmt Memory Type definitions:
+ *
+ *  00 - PMA    Normal Cacheable, No change to implied PMA memory type
+ *  01 - NC     Non-cacheable, idempotent, weakly-ordered Main Memory
+ *  10 - IO     Non-cacheable, non-idempotent, strongly-ordered I/O memory
+ *  11 - Rsvd   Reserved for future standard use
+ */
+#define PTE_PMBT_NOCACHE    BIT(61, UL)
+#define PTE_PMBT_IO         BIT(62, UL)
+
 #define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
 
+#define PTE_PBMT_MASK   (PTE_PMBT_NOCACHE | PTE_PMBT_IO)
+
 /* Calculate the offsets into the pagetables for a given VA */
 #define pt_linear_offset(lvl, va)   ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
 
@@ -202,7 +217,7 @@ static inline pte_t read_pte(const pte_t *p)
     return read_atomic(p);
 }
 
-static inline pte_t pte_from_mfn(mfn_t mfn, unsigned int flags)
+static inline pte_t pte_from_mfn(mfn_t mfn, pte_attr_t flags)
 {
     unsigned long pte = (mfn_x(mfn) << PTE_PPN_SHIFT) | flags;
     return (pte_t){ .pte = pte };
diff --git a/xen/arch/riscv/pt.c b/xen/arch/riscv/pt.c
index 918b1b91ab..82c8c73c3e 100644
--- a/xen/arch/riscv/pt.c
+++ b/xen/arch/riscv/pt.c
@@ -25,7 +25,7 @@ static inline mfn_t get_root_page(void)
  * See the comment about the possible combination of (mfn, flags) in
  * the comment above pt_update().
  */
-static bool pt_check_entry(pte_t entry, mfn_t mfn, unsigned int flags)
+static bool pt_check_entry(pte_t entry, mfn_t mfn, pte_attr_t flags)
 {
     /* Sanity check when modifying an entry. */
     if ( (flags & PTE_VALID) && mfn_eq(mfn, INVALID_MFN) )
@@ -260,7 +260,7 @@ pte_t pt_walk(vaddr_t va, unsigned int *pte_level)
  */
 static int pt_update_entry(mfn_t root, vaddr_t virt,
                            mfn_t mfn, unsigned int *target,
-                           unsigned int flags)
+                           pte_attr_t flags)
 {
     int rc;
     /*
@@ -328,17 +328,19 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
         pte.pte = 0;
     else
     {
+        const pte_attr_t attrs = PTE_ACCESS_MASK | PTE_PBMT_MASK;
+
         /* We are inserting a mapping => Create new pte. */
         if ( !mfn_eq(mfn, INVALID_MFN) )
             pte = pte_from_mfn(mfn, PTE_VALID);
-        else /* We are updating the permission => Copy the current pte. */
+        else /* We are updating the attributes => Copy the current pte. */
         {
             pte = *ptep;
-            pte.pte &= ~PTE_ACCESS_MASK;
+            pte.pte &= ~attrs;
         }
 
-        /* update permission according to the flags */
-        pte.pte |= (flags & PTE_ACCESS_MASK) | PTE_ACCESSED | PTE_DIRTY;
+        /* Update attributes of PTE according to the flags. */
+        pte.pte |= (flags & attrs) | PTE_ACCESSED | PTE_DIRTY;
     }
 
     write_pte(ptep, pte);
@@ -353,7 +355,7 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
 
 /* Return the level where mapping should be done */
 static int pt_mapping_level(unsigned long vfn, mfn_t mfn, unsigned long nr,
-                            unsigned int flags)
+                            pte_attr_t flags)
 {
     unsigned int level = 0;
     unsigned long mask;
@@ -407,7 +409,7 @@ static DEFINE_SPINLOCK(pt_lock);
  * inserting will be done.
  */
 static int pt_update(vaddr_t virt, mfn_t mfn,
-                     unsigned long nr_mfns, unsigned int flags)
+                     unsigned long nr_mfns, pte_attr_t flags)
 {
     int rc = 0;
     unsigned long vfn = PFN_DOWN(virt);
@@ -535,7 +537,7 @@ int __init populate_pt_range(unsigned long virt, unsigned long nr_mfns)
 }
 
 /* Map a 4k page in a fixmap entry */
-void set_fixmap(unsigned int map, mfn_t mfn, unsigned int flags)
+void set_fixmap(unsigned int map, mfn_t mfn, pte_attr_t flags)
 {
     if ( map_pages_to_xen(FIXMAP_ADDR(map), mfn, 1, flags | PTE_SMALL) != 0 )
         BUG();
-- 
2.49.0
Re: [PATCH v2 03/16] xen/riscv: introduce support of Svpbmt extension
Posted by Jan Beulich 5 months, 3 weeks ago
On 06.05.2025 18:51, Oleksii Kurochko wrote:
> Svpbmt extension is necessary for chaning the memory type for a page contains
> a combination of attributes that indicate the cacheability, idempotency,
> and ordering properties for access to that page.

The title suggest use of the extension is optional.

> --- a/xen/arch/riscv/Kconfig
> +++ b/xen/arch/riscv/Kconfig
> @@ -10,11 +10,25 @@ config RISCV
>  config RISCV_64
>  	def_bool y
>  	select 64BIT
> +	select HAS_SVPBMT

Such redundant ...

>  config ARCH_DEFCONFIG
>  	string
>  	default "arch/riscv/configs/tiny64_defconfig"
>  
> +config HAS_SVPBMT
> +	bool
> +	depends on RISCV_64

... dependencies are frowned upon, afaik. And it's pretty certainly not
needed here.

> +	help
> +	  This config enables usage of Svpbmt ISA-extension ( Supervisor-mode:
> +	  page-based memory types).
> +
> +	  The memory type for a page contains a combination of attributes
> +	  that indicate the cacheability, idempotency, and ordering
> +	  properties for access to that page.
> +
> +	  The Svpbmt extension is only available on 64-bit cpus.

I don't mind the help text, but for a prompt-less option it's of little
use (beyond what a comment could also achieve).

> --- a/xen/arch/riscv/include/asm/page.h
> +++ b/xen/arch/riscv/include/asm/page.h
> @@ -46,6 +46,8 @@
>  #define PAGE_HYPERVISOR_RX          (PTE_VALID | PTE_READABLE | PTE_EXECUTABLE)
>  
>  #define PAGE_HYPERVISOR             PAGE_HYPERVISOR_RW
> +#define PAGE_HYPERVISOR_NOCACHE     (PAGE_HYPERVISOR_RW | PTE_PMBT_IO)
> +#define PAGE_HYPERVISOR_WC          (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE)

Hmm, odd - NOCACHE doesn't really mean "no cache" then? I think this
would require a comment then.

> @@ -56,8 +58,21 @@
>  #define PTE_SMALL       BIT(10, UL)
>  #define PTE_POPULATE    BIT(11, UL)
>  
> +/*
> + * [62:61] Svpbmt Memory Type definitions:
> + *
> + *  00 - PMA    Normal Cacheable, No change to implied PMA memory type
> + *  01 - NC     Non-cacheable, idempotent, weakly-ordered Main Memory
> + *  10 - IO     Non-cacheable, non-idempotent, strongly-ordered I/O memory
> + *  11 - Rsvd   Reserved for future standard use
> + */
> +#define PTE_PMBT_NOCACHE    BIT(61, UL)
> +#define PTE_PMBT_IO         BIT(62, UL)

Unlike PTE_SMALL and PTE_POPULATE these are arch-defined; I think they
want to move up to where the other arch-defined bits are, thus also
maping them appear before their first use.

Jan
Re: [PATCH v2 03/16] xen/riscv: introduce support of Svpbmt extension
Posted by Oleksii Kurochko 5 months, 2 weeks ago
On 5/13/25 6:00 PM, Jan Beulich wrote:
> On 06.05.2025 18:51, Oleksii Kurochko wrote:
>> Svpbmt extension is necessary for chaning the memory type for a page contains
>> a combination of attributes that indicate the cacheability, idempotency,
>> and ordering properties for access to that page.
> The title suggest use of the extension is optional.
>
>> --- a/xen/arch/riscv/Kconfig
>> +++ b/xen/arch/riscv/Kconfig
>> @@ -10,11 +10,25 @@ config RISCV
>>   config RISCV_64
>>   	def_bool y
>>   	select 64BIT
>> +	select HAS_SVPBMT
> Such redundant ...
>
>>   config ARCH_DEFCONFIG
>>   	string
>>   	default "arch/riscv/configs/tiny64_defconfig"
>>   
>> +config HAS_SVPBMT
>> +	bool
>> +	depends on RISCV_64
> ... dependencies are frowned upon, afaik. And it's pretty certainly not
> needed here.
>
>> +	help
>> +	  This config enables usage of Svpbmt ISA-extension ( Supervisor-mode:
>> +	  page-based memory types).
>> +
>> +	  The memory type for a page contains a combination of attributes
>> +	  that indicate the cacheability, idempotency, and ordering
>> +	  properties for access to that page.
>> +
>> +	  The Svpbmt extension is only available on 64-bit cpus.
> I don't mind the help text, but for a prompt-less option it's of little
> use (beyond what a comment could also achieve).

I'll drop "depends on RISCV_64" for HAS_SVOBMT and leave only 'select HAS_SVPBMT'
and move the help text to commit messaage.

>
>> --- a/xen/arch/riscv/include/asm/page.h
>> +++ b/xen/arch/riscv/include/asm/page.h
>> @@ -46,6 +46,8 @@
>>   #define PAGE_HYPERVISOR_RX          (PTE_VALID | PTE_READABLE | PTE_EXECUTABLE)
>>   
>>   #define PAGE_HYPERVISOR             PAGE_HYPERVISOR_RW
>> +#define PAGE_HYPERVISOR_NOCACHE     (PAGE_HYPERVISOR_RW | PTE_PMBT_IO)
>> +#define PAGE_HYPERVISOR_WC          (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE)
> Hmm, odd - NOCACHE doesn't really mean "no cache" then? I think this
> would require a comment then.

According to the table (Svpbmt Memory Type definitions) in the comment below these
definitions both of them (PTE_PMBT_IO and PTE_PMBT_NOCACHE) are non-cachable.

I wasn't sure what and for what should be used so I did in sync with Arm which
defines "#define PAGE_HYPERVISOR_NOCACHE (_PAGE_DEVICE|MT_DEVICE_nGnRE)" where
MT_DEVICE_nGnRE is equivalent of PTE_PMBT_IO.

I can add the comment above definition of PAGE_HYPERVISOR_NOCACHE:
/* Non-cacheable, non-idempotent, strongly-ordered I/O memory */.

Something similar then I can add for PAGE_HYPERVISOR_WC:
/* Non-cacheable, idempotent, weakly-ordered Main Memory */

>
>> @@ -56,8 +58,21 @@
>>   #define PTE_SMALL       BIT(10, UL)
>>   #define PTE_POPULATE    BIT(11, UL)
>>   
>> +/*
>> + * [62:61] Svpbmt Memory Type definitions:
>> + *
>> + *  00 - PMA    Normal Cacheable, No change to implied PMA memory type
>> + *  01 - NC     Non-cacheable, idempotent, weakly-ordered Main Memory
>> + *  10 - IO     Non-cacheable, non-idempotent, strongly-ordered I/O memory
>> + *  11 - Rsvd   Reserved for future standard use
>> + */
>> +#define PTE_PMBT_NOCACHE    BIT(61, UL)
>> +#define PTE_PMBT_IO         BIT(62, UL)
> Unlike PTE_SMALL and PTE_POPULATE these are arch-defined; I think they
> want to move up to where the other arch-defined bits are, thus also
> maping them appear before their first use.

Sure, I'll move them up.

Thanks.

~ Oleksii