Svpbmt extension is necessary for chaning the memory type for a page contains
a combination of attributes that indicate the cacheability, idempotency,
and ordering properties for access to that page.
As a part of the patch the following is introduced:
- Svpbmt memory type defintions: PTE_PBMT_{NOCACHE,IO}.
- PAGE_HYPERVISOR_{NOCACHE,WC}.
- RISCV_ISA_EXT_svpbmt and add a check in runtime that Svpbmt is
supported by platform.
- Update riscv/booting.txt with information about Svpbmt.
- Update logic of pt_update_entry() to take into account PBMT bits.
Use 'unsigned long' for pte_attr_t as PMBT bits are 61 and 62 and it doesn't
fit into 'unsigned int'. Also, update function prototypes which uses
'unsigned int' for flags/attibutes.
Enable Svpbmt for testing in QEMU as Svpmbt is now mandatory for
Xen work.
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in v3:
- Remove dependecy "depends on RISCV_64" for HAS_SVPBMT as it is selected by
CONFIG_RISCV_64.
- Move HAS_SVPMBT's help text to commit message as it's not too much sense
to have it for a prompt-less option.
- Move definition of PTE_PMBT_{NOCACHE,IO} up closer to arch-specific
definitions.
- Update the commit message and subject.
- Add a comment above PAGE_HYPERVISOR_NOCACHE.
---
Changes in v2:
- new patch.
---
automation/scripts/qemu-smoke-riscv64.sh | 1 +
docs/misc/riscv/booting.txt | 4 ++++
xen/arch/riscv/Kconfig | 4 ++++
xen/arch/riscv/cpufeature.c | 2 ++
xen/arch/riscv/include/asm/cpufeature.h | 1 +
xen/arch/riscv/include/asm/fixmap.h | 2 +-
xen/arch/riscv/include/asm/mm-types.h | 8 ++++++++
xen/arch/riscv/include/asm/page.h | 23 ++++++++++++++++++++++-
xen/arch/riscv/pt.c | 20 +++++++++++---------
9 files changed, 54 insertions(+), 11 deletions(-)
create mode 100644 xen/arch/riscv/include/asm/mm-types.h
diff --git a/automation/scripts/qemu-smoke-riscv64.sh b/automation/scripts/qemu-smoke-riscv64.sh
index b2e112c942..25f9e4190e 100755
--- a/automation/scripts/qemu-smoke-riscv64.sh
+++ b/automation/scripts/qemu-smoke-riscv64.sh
@@ -7,6 +7,7 @@ rm -f smoke.serial
export TEST_CMD="qemu-system-riscv64 \
-M virt,aia=aplic-imsic \
+ -cpu rv64,svpbmt=on \
-smp 1 \
-nographic \
-m 2g \
diff --git a/docs/misc/riscv/booting.txt b/docs/misc/riscv/booting.txt
index 3a8474a27d..e100bde575 100644
--- a/docs/misc/riscv/booting.txt
+++ b/docs/misc/riscv/booting.txt
@@ -18,3 +18,7 @@ Xen is run:
- Zihintpause:
On a system that doesn't have this extension, cpu_relax() should be
implemented properly.
+- SVPBMT is mandatory to enable changing the memory attributes of a page.
+ For platforms that do not support SVPBMT, it is necessary to introduce a
+ similar mechanism as described in:
+ https://lore.kernel.org/all/20241102000843.1301099-1-samuel.holland@sifive.com/
diff --git a/xen/arch/riscv/Kconfig b/xen/arch/riscv/Kconfig
index d882e0a059..62c5b7ba34 100644
--- a/xen/arch/riscv/Kconfig
+++ b/xen/arch/riscv/Kconfig
@@ -10,11 +10,15 @@ config RISCV
config RISCV_64
def_bool y
select 64BIT
+ select HAS_SVPBMT
config ARCH_DEFCONFIG
string
default "arch/riscv/configs/tiny64_defconfig"
+config HAS_SVPBMT
+ bool
+
menu "Architecture Features"
source "arch/Kconfig"
diff --git a/xen/arch/riscv/cpufeature.c b/xen/arch/riscv/cpufeature.c
index 3246a03624..b7d5ec6580 100644
--- a/xen/arch/riscv/cpufeature.c
+++ b/xen/arch/riscv/cpufeature.c
@@ -137,6 +137,7 @@ const struct riscv_isa_ext_data __initconst riscv_isa_ext[] = {
RISCV_ISA_EXT_DATA(zbs),
RISCV_ISA_EXT_DATA(smaia),
RISCV_ISA_EXT_DATA(ssaia),
+ RISCV_ISA_EXT_DATA(svpbmt),
};
static const struct riscv_isa_ext_data __initconst required_extensions[] = {
@@ -151,6 +152,7 @@ static const struct riscv_isa_ext_data __initconst required_extensions[] = {
RISCV_ISA_EXT_DATA(zifencei),
RISCV_ISA_EXT_DATA(zihintpause),
RISCV_ISA_EXT_DATA(zbb),
+ RISCV_ISA_EXT_DATA(svpbmt),
};
static bool __init is_lowercase_extension_name(const char *str)
diff --git a/xen/arch/riscv/include/asm/cpufeature.h b/xen/arch/riscv/include/asm/cpufeature.h
index 1015b6ee44..768b84b769 100644
--- a/xen/arch/riscv/include/asm/cpufeature.h
+++ b/xen/arch/riscv/include/asm/cpufeature.h
@@ -37,6 +37,7 @@ enum riscv_isa_ext_id {
RISCV_ISA_EXT_zbs,
RISCV_ISA_EXT_smaia,
RISCV_ISA_EXT_ssaia,
+ RISCV_ISA_EXT_svpbmt,
RISCV_ISA_EXT_MAX
};
diff --git a/xen/arch/riscv/include/asm/fixmap.h b/xen/arch/riscv/include/asm/fixmap.h
index e399a15f53..5990c964aa 100644
--- a/xen/arch/riscv/include/asm/fixmap.h
+++ b/xen/arch/riscv/include/asm/fixmap.h
@@ -33,7 +33,7 @@
extern pte_t xen_fixmap[];
/* Map a page in a fixmap entry */
-void set_fixmap(unsigned int map, mfn_t mfn, unsigned int flags);
+void set_fixmap(unsigned int map, mfn_t mfn, pte_attr_t flags);
/* Remove a mapping from a fixmap entry */
void clear_fixmap(unsigned int map);
diff --git a/xen/arch/riscv/include/asm/mm-types.h b/xen/arch/riscv/include/asm/mm-types.h
new file mode 100644
index 0000000000..fa512064b8
--- /dev/null
+++ b/xen/arch/riscv/include/asm/mm-types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef ASM_RISCV_MM_TYPES_H
+#define ASM_RISCV_MM_TYPES_H
+
+typedef unsigned long pte_attr_t;
+
+#endif /* ASM_RISCV_MM_TYPES_H */
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index bf8988f657..81b91b63d8 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -37,6 +37,16 @@
#define PTE_ACCESSED BIT(6, UL)
#define PTE_DIRTY BIT(7, UL)
#define PTE_RSW (BIT(8, UL) | BIT(9, UL))
+/*
+ * [62:61] Svpbmt Memory Type definitions:
+ *
+ * 00 - PMA Normal Cacheable, No change to implied PMA memory type
+ * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory
+ * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory
+ * 11 - Rsvd Reserved for future standard use
+ */
+#define PTE_PMBT_NOCACHE BIT(61, UL)
+#define PTE_PMBT_IO BIT(62, UL)
#define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE | PTE_WRITABLE)
#define PTE_TABLE (PTE_VALID)
@@ -46,6 +56,15 @@
#define PAGE_HYPERVISOR_RX (PTE_VALID | PTE_READABLE | PTE_EXECUTABLE)
#define PAGE_HYPERVISOR PAGE_HYPERVISOR_RW
+/*
+ * PAGE_HYPERVISOR_NOCACHE is used for ioremap().
+ *
+ * Both PTE_PMBT_IO and PTE_PMBT_NOCACHE are non-cacheable, but the difference
+ * is that IO is non-idempotent and strongly ordered, which makes it a good
+ * candidate for mapping IOMEM.
+ */
+#define PAGE_HYPERVISOR_NOCACHE (PAGE_HYPERVISOR_RW | PTE_PMBT_IO)
+#define PAGE_HYPERVISOR_WC (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE)
/*
* The PTE format does not contain the following bits within itself;
@@ -58,6 +77,8 @@
#define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
+#define PTE_PBMT_MASK (PTE_PMBT_NOCACHE | PTE_PMBT_IO)
+
/* Calculate the offsets into the pagetables for a given VA */
#define pt_linear_offset(lvl, va) ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
@@ -202,7 +223,7 @@ static inline pte_t read_pte(const pte_t *p)
return read_atomic(p);
}
-static inline pte_t pte_from_mfn(mfn_t mfn, unsigned int flags)
+static inline pte_t pte_from_mfn(mfn_t mfn, pte_attr_t flags)
{
unsigned long pte = (mfn_x(mfn) << PTE_PPN_SHIFT) | flags;
return (pte_t){ .pte = pte };
diff --git a/xen/arch/riscv/pt.c b/xen/arch/riscv/pt.c
index 918b1b91ab..82c8c73c3e 100644
--- a/xen/arch/riscv/pt.c
+++ b/xen/arch/riscv/pt.c
@@ -25,7 +25,7 @@ static inline mfn_t get_root_page(void)
* See the comment about the possible combination of (mfn, flags) in
* the comment above pt_update().
*/
-static bool pt_check_entry(pte_t entry, mfn_t mfn, unsigned int flags)
+static bool pt_check_entry(pte_t entry, mfn_t mfn, pte_attr_t flags)
{
/* Sanity check when modifying an entry. */
if ( (flags & PTE_VALID) && mfn_eq(mfn, INVALID_MFN) )
@@ -260,7 +260,7 @@ pte_t pt_walk(vaddr_t va, unsigned int *pte_level)
*/
static int pt_update_entry(mfn_t root, vaddr_t virt,
mfn_t mfn, unsigned int *target,
- unsigned int flags)
+ pte_attr_t flags)
{
int rc;
/*
@@ -328,17 +328,19 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
pte.pte = 0;
else
{
+ const pte_attr_t attrs = PTE_ACCESS_MASK | PTE_PBMT_MASK;
+
/* We are inserting a mapping => Create new pte. */
if ( !mfn_eq(mfn, INVALID_MFN) )
pte = pte_from_mfn(mfn, PTE_VALID);
- else /* We are updating the permission => Copy the current pte. */
+ else /* We are updating the attributes => Copy the current pte. */
{
pte = *ptep;
- pte.pte &= ~PTE_ACCESS_MASK;
+ pte.pte &= ~attrs;
}
- /* update permission according to the flags */
- pte.pte |= (flags & PTE_ACCESS_MASK) | PTE_ACCESSED | PTE_DIRTY;
+ /* Update attributes of PTE according to the flags. */
+ pte.pte |= (flags & attrs) | PTE_ACCESSED | PTE_DIRTY;
}
write_pte(ptep, pte);
@@ -353,7 +355,7 @@ static int pt_update_entry(mfn_t root, vaddr_t virt,
/* Return the level where mapping should be done */
static int pt_mapping_level(unsigned long vfn, mfn_t mfn, unsigned long nr,
- unsigned int flags)
+ pte_attr_t flags)
{
unsigned int level = 0;
unsigned long mask;
@@ -407,7 +409,7 @@ static DEFINE_SPINLOCK(pt_lock);
* inserting will be done.
*/
static int pt_update(vaddr_t virt, mfn_t mfn,
- unsigned long nr_mfns, unsigned int flags)
+ unsigned long nr_mfns, pte_attr_t flags)
{
int rc = 0;
unsigned long vfn = PFN_DOWN(virt);
@@ -535,7 +537,7 @@ int __init populate_pt_range(unsigned long virt, unsigned long nr_mfns)
}
/* Map a 4k page in a fixmap entry */
-void set_fixmap(unsigned int map, mfn_t mfn, unsigned int flags)
+void set_fixmap(unsigned int map, mfn_t mfn, pte_attr_t flags)
{
if ( map_pages_to_xen(FIXMAP_ADDR(map), mfn, 1, flags | PTE_SMALL) != 0 )
BUG();
--
2.49.0
On 21.05.2025 18:03, Oleksii Kurochko wrote:
> Svpbmt extension is necessary for chaning the memory type for a page contains
> a combination of attributes that indicate the cacheability, idempotency,
> and ordering properties for access to that page.
>
> As a part of the patch the following is introduced:
> - Svpbmt memory type defintions: PTE_PBMT_{NOCACHE,IO}.
> - PAGE_HYPERVISOR_{NOCACHE,WC}.
> - RISCV_ISA_EXT_svpbmt and add a check in runtime that Svpbmt is
> supported by platform.
> - Update riscv/booting.txt with information about Svpbmt.
> - Update logic of pt_update_entry() to take into account PBMT bits.
>
> Use 'unsigned long' for pte_attr_t as PMBT bits are 61 and 62 and it doesn't
> fit into 'unsigned int'. Also, update function prototypes which uses
> 'unsigned int' for flags/attibutes.
>
> Enable Svpbmt for testing in QEMU as Svpmbt is now mandatory for
> Xen work.
>
> Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
On 5/22/25 9:26 AM, Jan Beulich wrote:
> On 21.05.2025 18:03, Oleksii Kurochko wrote:
>> Svpbmt extension is necessary for chaning the memory type for a page contains
>> a combination of attributes that indicate the cacheability, idempotency,
>> and ordering properties for access to that page.
>>
>> As a part of the patch the following is introduced:
>> - Svpbmt memory type defintions: PTE_PBMT_{NOCACHE,IO}.
>> - PAGE_HYPERVISOR_{NOCACHE,WC}.
>> - RISCV_ISA_EXT_svpbmt and add a check in runtime that Svpbmt is
>> supported by platform.
>> - Update riscv/booting.txt with information about Svpbmt.
>> - Update logic of pt_update_entry() to take into account PBMT bits.
>>
>> Use 'unsigned long' for pte_attr_t as PMBT bits are 61 and 62 and it doesn't
>> fit into 'unsigned int'. Also, update function prototypes which uses
>> 'unsigned int' for flags/attibutes.
>>
>> Enable Svpbmt for testing in QEMU as Svpmbt is now mandatory for
>> Xen work.
>>
>> Signed-off-by: Oleksii Kurochko<oleksii.kurochko@gmail.com>
> Acked-by: Jan Beulich<jbeulich@suse.com>
>
Thanks.
I just noticed a minor typo (PMBT_{IO,NOCACHE}->PMBT_{IO,NOCACHE} in the changes:
xen$ git diff
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index 81b91b63d8..4cb0179648 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -45,8 +45,8 @@
* 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory
* 11 - Rsvd Reserved for future standard use
*/
-#define PTE_PMBT_NOCACHE BIT(61, UL)
-#define PTE_PMBT_IO BIT(62, UL)
+#define PTE_PBMT_NOCACHE BIT(61, UL)
+#define PTE_PBMT_IO BIT(62, UL)
#define PTE_LEAF_DEFAULT (PTE_VALID | PTE_READABLE | PTE_WRITABLE)
#define PTE_TABLE (PTE_VALID)
@@ -59,12 +59,12 @@
/*
* PAGE_HYPERVISOR_NOCACHE is used for ioremap().
*
- * Both PTE_PMBT_IO and PTE_PMBT_NOCACHE are non-cacheable, but the difference
+ * Both PTE_PBMT_IO and PTE_PBMT_NOCACHE are non-cacheable, but the difference
* is that IO is non-idempotent and strongly ordered, which makes it a good
* candidate for mapping IOMEM.
*/
-#define PAGE_HYPERVISOR_NOCACHE (PAGE_HYPERVISOR_RW | PTE_PMBT_IO)
-#define PAGE_HYPERVISOR_WC (PAGE_HYPERVISOR_RW | PTE_PMBT_NOCACHE)
+#define PAGE_HYPERVISOR_NOCACHE (PAGE_HYPERVISOR_RW | PTE_PBMT_IO)
+#define PAGE_HYPERVISOR_WC (PAGE_HYPERVISOR_RW | PTE_PBMT_NOCACHE)
/*
* The PTE format does not contain the following bits within itself;
@@ -77,7 +77,7 @@
#define PTE_ACCESS_MASK (PTE_READABLE | PTE_WRITABLE | PTE_EXECUTABLE)
-#define PTE_PBMT_MASK (PTE_PMBT_NOCACHE | PTE_PMBT_IO)
+#define PTE_PBMT_MASK (PTE_PBMT_NOCACHE | PTE_PBMT_IO)
/* Calculate the offsets into the pagetables for a given VA */
#define pt_linear_offset(lvl, va) ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
I will send them as a part of v4. (if it can't
be done during commit)
~ Oleksii
© 2016 - 2025 Red Hat, Inc.