Svpbmt extension is necessary for chaning the memory type for a page contains
a combination of attributes that indicate the cacheability, idempotency,
and ordering properties for access to that page.
As a part of the patch the following is introduced:
- Svpbmt memory type defintions: PTE_PBMT_{NOCACHE,IO}.
- PAGE_HYPERVISOR_{NOCACHE,WC}.
- RISCV_ISA_EXT_svpbmt and add a check in runtime that Svpbmt is
supported by platform.
- Update riscv/booting.txt with information about Svpbmt.
- Update logic of pt_update_entry() to take into account PBMT bits.
Use 'unsigned long' for pte_attr_t as PMBT bits are 61 and 62 and it doesn't
fit into 'unsigned int'. Also, update function prototypes which uses
'unsigned int' for flags/attibutes.
Enable Svpbmt for testing in QEMU as Svpmbt is now mandatory for
Xen work.
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in v2:
- new patch.
---
automation/scripts/qemu-smoke-riscv64.sh | 1 +
docs/misc/riscv/booting.txt | 4 ++++
xen/arch/riscv/Kconfig | 14 ++++++++++++++
xen/arch/riscv/cpufeature.c | 2 ++
xen/arch/riscv/include/asm/cpufeature.h | 1 +
xen/arch/riscv/include/asm/fixmap.h | 2 +-
xen/arch/riscv/include/asm/mm-types.h | 8 ++++++++
xen/arch/riscv/include/asm/page.h | 17 ++++++++++++++++-
xen/arch/riscv/pt.c | 20 +++++++++++---------
9 files changed, 58 insertions(+), 11 deletions(-)
create mode 100644 xen/arch/riscv/include/asm/mm-types.h
diff --git a/automation/scripts/qemu-smoke-riscv64.sh b/automation/scripts/qemu-smoke-riscv64.sh
index b2e112c942..25f9e4190e 100755
--- a/automation/scripts/qemu-smoke-riscv64.sh
+++ b/automation/scripts/qemu-smoke-riscv64.sh
@@ -7,6 +7,7 @@ rm -f smoke.serial
export TEST_CMD="qemu-system-riscv64 \
-M virt,aia=aplic-imsic \
+ -cpu rv64,svpbmt=on \
-smp 1 \
-nographic \
-m 2g \
diff --git a/docs/misc/riscv/booting.txt b/docs/misc/riscv/booting.txt
index 3a8474a27d..e100bde575 100644
--- a/docs/misc/riscv/booting.txt
+++ b/docs/misc/riscv/booting.txt
@@ -18,3 +18,7 @@ Xen is run:
- Zihintpause:
On a system that doesn't have this extension, cpu_relax() should be
implemented properly.
+- SVPBMT is mandatory to enable changing the memory attributes of a page.
+ For platforms that do not support SVPBMT, it is necessary to introduce a
+ similar mechanism as described in:
+ https://lore.kernel.org/all/20241102000843.1301099-1-samuel.holland@sifive.com/
diff --git a/xen/arch/riscv/Kconfig b/xen/arch/riscv/Kconfig
index d882e0a059..60520dab57 100644
--- a/xen/arch/riscv/Kconfig
+++ b/xen/arch/riscv/Kconfig
@@ -10,11 +10,25 @@ config RISCV
config RISCV_64
def_bool y
select 64BIT
+ select HAS_SVPBMT
config ARCH_DEFCONFIG
string
default "arch/riscv/configs/tiny64_defconfig"
+config HAS_SVPBMT
+ bool
+ depends on RISCV_64
+ help
+ This config enables usage of Svpbmt ISA-extension ( Supervisor-mode:
+ page-based memory types).
+
+ The memory type for a page contains a combination of attributes
+ that indicate the cacheability, idempotency, and ordering
+ properties for access to that page.
+
+ The Svpbmt extension is only available on 64-bit cpus.
+
menu "Architecture Features"
source "arch/Kconfig"
diff --git a/xen/arch/riscv/cpufeature.c b/xen/arch/riscv/cpufeature.c
index 3246a03624..b7d5ec6580 100644
--- a/xen/arch/riscv/cpufeature.c
+++ b/xen/arch/riscv/cpufeature.c
@@ -137,6 +137,7 @@ const struct riscv_isa_ext_data __initconst riscv_isa_ext[] = {
RISCV_ISA_EXT_DATA(zbs),
RISCV_ISA_EXT_DATA(smaia),
RISCV_ISA_EXT_DATA(ssaia),
+ RISCV_ISA_EXT_DATA(svpbmt),
};
static const struct riscv_isa_ext_data __initconst required_extensions[] = {
@@ -151,6 +152,7 @@ static const struct riscv_isa_ext_data __initconst required_extensions[] = {
RISCV_ISA_EXT_DATA(zifencei),
RISCV_ISA_EXT_DATA(zihintpause),
RISCV_ISA_EXT_DATA(zbb),
+ RISCV_ISA_EXT_DATA(svpbmt),
};
static bool __init is_lowercase_extension_name(const char *str)
diff --git a/xen/arch/riscv/include/asm/cpufeature.h b/xen/arch/riscv/include/asm/cpufeature.h
index 1015b6ee44..768b84b769 100644
--- a/xen/arch/riscv/include/asm/cpufeature.h
+++ b/xen/arch/riscv/include/asm/cpufeature.h
@@ -37,6 +37,7 @@ enum riscv_isa_ext_id {
RISCV_ISA_EXT_zbs,
RISCV_ISA_EXT_smaia,
RISCV_ISA_EXT_ssaia,
+ RISCV_ISA_EXT_svpbmt,
RISCV_ISA_EXT_MAX
};
diff --git a/xen/arch/riscv/include/asm/fixmap.h b/xen/arch/riscv/include/asm/fixmap.h
index e399a15f53..5990c964aa 100644
--- a/xen/arch/riscv/include/asm/fixmap.h
+++ b/xen/arch/riscv/include/asm/fixmap.h
@@ -33,7 +33,7 @@
extern pte_t xen_fixmap[];
/* Map a page in a fixmap entry */
-void set_fixmap(unsigned int map, mfn_t mfn, unsigned int flags);
+void set_fixmap(unsigned int map, mfn_t mfn, pte_attr_t flags);
/* Remove a mapping from a fixmap entry */
void clear_fixmap(unsigned int map);
diff --git a/xen/arch/riscv/include/asm/mm-types.h b/xen/arch/riscv/include/asm/mm-types.h
new file mode 100644
index 0000000000..fa512064b8
--- /dev/null
+++ b/xen/arch/riscv/include/asm/mm-types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef ASM_RISCV_MM_TYPES_H
+#define ASM_RISCV_MM_TYPES_H
+
+typedef unsigned long pte_attr_t;
+
+#endif /* ASM_RISCV_MM_TYPES_H */
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index bf8988f657..2af4823170 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -46,6 +46,8 @@
#define PAGE_HYPERVISOR_RX (PTE_VALID | PTE_READABLE | PTE_EXECUTABLE)
#define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW
+#define PAGE_HYPERVISOR_NOCACHE (PAGE_HYPERVISOR_RW | PTE_PMBT_IO)
+#define PAGE_HYPERVISOR_WC (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE)
/*
* The PTE format does not contain the following bits within itself;
@@ -56,8 +58,21 @@
#define PTE_SMALL BIT(10, UL)
#define PTE_POPULATE BIT(11, UL)
+/*
+ * [62:61] Svpbmt Memory Type definitions:
+ *
+ * 00 - PMA Normal Cacheable, No change to implied PMA memory type
+ * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory
+ * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory
+ * 11 - Rsvd Reserved for future standard use
+ */
+#define PTE_PMBT_NOCACHE BIT(61, UL)
+#define PTE_PMBT_IO BIT(62, UL)
+
#define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
+#define PTE_PBMT_MASK (PTE_PMBT_NOCACHE | PTE_PMBT_IO)
+
/* Calculate the offsets into the pagetables for a given VA */
#define pt_linear_offset(lvl, va) ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
@@ -202,7 +217,7 @@ static inline pte_t read_pte(const pte_t *p)
return read_atomic(p);
}
-static inline pte_t pte_from_mfn(mfn_t mfn, unsigned int flags)
+static inline pte_t pte_from_mfn(mfn_t mfn, pte_attr_t flags)
{
unsigned long pte = (mfn_x(mfn) << PTE_PPN_SHIFT) | flags;
return (pte_t){ .pte = pte };
diff --git a/xen/arch/riscv/pt.c b/xen/arch/riscv/pt.c
index 918b1b91ab..82c8c73c3e 100644
--- a/xen/arch/riscv/pt.c
+++ b/xen/arch/riscv/pt.c
@@ -25,7 +25,7 @@ static inline mfn_t get_root_page(void)
* See the comment about the possible combination of (mfn, flags) in
* the comment above pt_update().
*/
-static bool pt_check_entry(pte_t entry, mfn_t mfn, unsigned int flags)
+static bool pt_check_entry(pte_t entry, mfn_t mfn, pte_attr_t flags)
{
/* Sanity check when modifying an entry. */
if ( (flags & PTE_VALID) && mfn_eq(mfn, INVALID_MFN) )
@@ -260,7 +260,7 @@ pte_t pt_walk(vaddr_t va, unsigned int *pte_level)
*/
static int pt_update_entry(mfn_t root, vaddr_t virt,
mfn_t mfn, unsigned int *target,
- unsigned int flags)
+ pte_attr_t flags)
{
int rc;
/*
@@ -328,17 +328,19 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
pte.pte = 0;
else
{
+ const pte_attr_t attrs = PTE_ACCESS_MASK | PTE_PBMT_MASK;
+
/* We are inserting a mapping => Create new pte. */
if ( !mfn_eq(mfn, INVALID_MFN) )
pte = pte_from_mfn(mfn, PTE_VALID);
- else /* We are updating the permission => Copy the current pte. */
+ else /* We are updating the attributes => Copy the current pte. */
{
pte = *ptep;
- pte.pte &= ~PTE_ACCESS_MASK;
+ pte.pte &= ~attrs;
}
- /* update permission according to the flags */
- pte.pte |= (flags & PTE_ACCESS_MASK) | PTE_ACCESSED | PTE_DIRTY;
+ /* Update attributes of PTE according to the flags. */
+ pte.pte |= (flags & attrs) | PTE_ACCESSED | PTE_DIRTY;
}
write_pte(ptep, pte);
@@ -353,7 +355,7 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
/* Return the level where mapping should be done */
static int pt_mapping_level(unsigned long vfn, mfn_t mfn, unsigned long nr,
- unsigned int flags)
+ pte_attr_t flags)
{
unsigned int level = 0;
unsigned long mask;
@@ -407,7 +409,7 @@ static DEFINE_SPINLOCK(pt_lock);
* inserting will be done.
*/
static int pt_update(vaddr_t virt, mfn_t mfn,
- unsigned long nr_mfns, unsigned int flags)
+ unsigned long nr_mfns, pte_attr_t flags)
{
int rc = 0;
unsigned long vfn = PFN_DOWN(virt);
@@ -535,7 +537,7 @@ int __init populate_pt_range(unsigned long virt, unsigned long nr_mfns)
}
/* Map a 4k page in a fixmap entry */
-void set_fixmap(unsigned int map, mfn_t mfn, unsigned int flags)
+void set_fixmap(unsigned int map, mfn_t mfn, pte_attr_t flags)
{
if ( map_pages_to_xen(FIXMAP_ADDR(map), mfn, 1, flags | PTE_SMALL) != 0 )
BUG();
--
2.49.0
On 06.05.2025 18:51, Oleksii Kurochko wrote: > Svpbmt extension is necessary for chaning the memory type for a page contains > a combination of attributes that indicate the cacheability, idempotency, > and ordering properties for access to that page. The title suggest use of the extension is optional. > --- a/xen/arch/riscv/Kconfig > +++ b/xen/arch/riscv/Kconfig > @@ -10,11 +10,25 @@ config RISCV > config RISCV_64 > def_bool y > select 64BIT > + select HAS_SVPBMT Such redundant ... > config ARCH_DEFCONFIG > string > default "arch/riscv/configs/tiny64_defconfig" > > +config HAS_SVPBMT > + bool > + depends on RISCV_64 ... dependencies are frowned upon, afaik. And it's pretty certainly not needed here. > + help > + This config enables usage of Svpbmt ISA-extension ( Supervisor-mode: > + page-based memory types). > + > + The memory type for a page contains a combination of attributes > + that indicate the cacheability, idempotency, and ordering > + properties for access to that page. > + > + The Svpbmt extension is only available on 64-bit cpus. I don't mind the help text, but for a prompt-less option it's of little use (beyond what a comment could also achieve). > --- a/xen/arch/riscv/include/asm/page.h > +++ b/xen/arch/riscv/include/asm/page.h > @@ -46,6 +46,8 @@ > #define PAGE_HYPERVISOR_RX (PTE_VALID | PTE_READABLE | PTE_EXECUTABLE) > > #define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW > +#define PAGE_HYPERVISOR_NOCACHE (PAGE_HYPERVISOR_RW | PTE_PMBT_IO) > +#define PAGE_HYPERVISOR_WC (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE) Hmm, odd - NOCACHE doesn't really mean "no cache" then? I think this would require a comment then. > @@ -56,8 +58,21 @@ > #define PTE_SMALL BIT(10, UL) > #define PTE_POPULATE BIT(11, UL) > > +/* > + * [62:61] Svpbmt Memory Type definitions: > + * > + * 00 - PMA Normal Cacheable, No change to implied PMA memory type > + * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory > + * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory > + * 11 - Rsvd Reserved for future standard use > + */ > +#define PTE_PMBT_NOCACHE BIT(61, UL) > +#define PTE_PMBT_IO BIT(62, UL) Unlike PTE_SMALL and PTE_POPULATE these are arch-defined; I think they want to move up to where the other arch-defined bits are, thus also maping them appear before their first use. Jan
On 5/13/25 6:00 PM, Jan Beulich wrote: > On 06.05.2025 18:51, Oleksii Kurochko wrote: >> Svpbmt extension is necessary for chaning the memory type for a page contains >> a combination of attributes that indicate the cacheability, idempotency, >> and ordering properties for access to that page. > The title suggest use of the extension is optional. > >> --- a/xen/arch/riscv/Kconfig >> +++ b/xen/arch/riscv/Kconfig >> @@ -10,11 +10,25 @@ config RISCV >> config RISCV_64 >> def_bool y >> select 64BIT >> + select HAS_SVPBMT > Such redundant ... > >> config ARCH_DEFCONFIG >> string >> default "arch/riscv/configs/tiny64_defconfig" >> >> +config HAS_SVPBMT >> + bool >> + depends on RISCV_64 > ... dependencies are frowned upon, afaik. And it's pretty certainly not > needed here. > >> + help >> + This config enables usage of Svpbmt ISA-extension ( Supervisor-mode: >> + page-based memory types). >> + >> + The memory type for a page contains a combination of attributes >> + that indicate the cacheability, idempotency, and ordering >> + properties for access to that page. >> + >> + The Svpbmt extension is only available on 64-bit cpus. > I don't mind the help text, but for a prompt-less option it's of little > use (beyond what a comment could also achieve). I'll drop "depends on RISCV_64" for HAS_SVOBMT and leave only 'select HAS_SVPBMT' and move the help text to commit messaage. > >> --- a/xen/arch/riscv/include/asm/page.h >> +++ b/xen/arch/riscv/include/asm/page.h >> @@ -46,6 +46,8 @@ >> #define PAGE_HYPERVISOR_RX (PTE_VALID | PTE_READABLE | PTE_EXECUTABLE) >> >> #define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW >> +#define PAGE_HYPERVISOR_NOCACHE (PAGE_HYPERVISOR_RW | PTE_PMBT_IO) >> +#define PAGE_HYPERVISOR_WC (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE) > Hmm, odd - NOCACHE doesn't really mean "no cache" then? I think this > would require a comment then. According to the table (Svpbmt Memory Type definitions) in the comment below these definitions both of them (PTE_PMBT_IO and PTE_PMBT_NOCACHE) are non-cachable. I wasn't sure what and for what should be used so I did in sync with Arm which defines "#define PAGE_HYPERVISOR_NOCACHE (_PAGE_DEVICE|MT_DEVICE_nGnRE)" where MT_DEVICE_nGnRE is equivalent of PTE_PMBT_IO. I can add the comment above definition of PAGE_HYPERVISOR_NOCACHE: /* Non-cacheable, non-idempotent, strongly-ordered I/O memory */. Something similar then I can add for PAGE_HYPERVISOR_WC: /* Non-cacheable, idempotent, weakly-ordered Main Memory */ > >> @@ -56,8 +58,21 @@ >> #define PTE_SMALL BIT(10, UL) >> #define PTE_POPULATE BIT(11, UL) >> >> +/* >> + * [62:61] Svpbmt Memory Type definitions: >> + * >> + * 00 - PMA Normal Cacheable, No change to implied PMA memory type >> + * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory >> + * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory >> + * 11 - Rsvd Reserved for future standard use >> + */ >> +#define PTE_PMBT_NOCACHE BIT(61, UL) >> +#define PTE_PMBT_IO BIT(62, UL) > Unlike PTE_SMALL and PTE_POPULATE these are arch-defined; I think they > want to move up to where the other arch-defined bits are, thus also > maping them appear before their first use. Sure, I'll move them up. Thanks. ~ Oleksii
© 2016 - 2025 Red Hat, Inc.