Introduce gstage_mode_detect() to probe supported G-stage paging
modes at boot. The function iterates over possible HGATP modes
(Sv32x4 on RV32, Sv39x4/Sv48x4/Sv57x4 on RV64) and selects the
first valid one by programming CSR_HGATP and reading it back.
The selected mode is stored in gstage_mode (marked __ro_after_init)
and reported via printk. If no supported mode is found, Xen panics
since Bare mode is not expected to be used.
Finally, CSR_HGATP is cleared and a local_hfence_gvma_all() is issued
to avoid any potential speculative pollution of the TLB, as required
by the RISC-V spec.
The following issue starts to occur:
./<riscv>/asm/flushtlb.h:37:55: error: 'struct page_info' declared inside
parameter list will not be visible outside of this definition or
declaration [-Werror]
37 | static inline void page_set_tlbflush_timestamp(struct page_info *page)
To resolve it, forward declaration of struct page_info is added to
<asm/flushtlb.h.
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V5:
- New patch.
---
xen/arch/riscv/Makefile | 1 +
xen/arch/riscv/include/asm/flushtlb.h | 7 ++
xen/arch/riscv/include/asm/p2m.h | 4 +
xen/arch/riscv/include/asm/riscv_encoding.h | 5 ++
xen/arch/riscv/p2m.c | 91 +++++++++++++++++++++
xen/arch/riscv/setup.c | 3 +
6 files changed, 111 insertions(+)
create mode 100644 xen/arch/riscv/p2m.c
diff --git a/xen/arch/riscv/Makefile b/xen/arch/riscv/Makefile
index e2b8aa42c8..264e265699 100644
--- a/xen/arch/riscv/Makefile
+++ b/xen/arch/riscv/Makefile
@@ -7,6 +7,7 @@ obj-y += intc.o
obj-y += irq.o
obj-y += mm.o
obj-y += pt.o
+obj-y += p2m.o
obj-$(CONFIG_RISCV_64) += riscv64/
obj-y += sbi.o
obj-y += setup.o
diff --git a/xen/arch/riscv/include/asm/flushtlb.h b/xen/arch/riscv/include/asm/flushtlb.h
index 51c8f753c5..e70badae0c 100644
--- a/xen/arch/riscv/include/asm/flushtlb.h
+++ b/xen/arch/riscv/include/asm/flushtlb.h
@@ -7,6 +7,13 @@
#include <asm/sbi.h>
+struct page_info;
+
+static inline void local_hfence_gvma_all(void)
+{
+ asm volatile ( "hfence.gvma zero, zero" ::: "memory" );
+}
+
/* Flush TLB of local processor for address va. */
static inline void flush_tlb_one_local(vaddr_t va)
{
diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h
index e43c559e0c..9d4a5d6a2e 100644
--- a/xen/arch/riscv/include/asm/p2m.h
+++ b/xen/arch/riscv/include/asm/p2m.h
@@ -6,6 +6,8 @@
#include <asm/page-bits.h>
+extern unsigned long gstage_mode;
+
#define paddr_bits PADDR_BITS
/*
@@ -88,6 +90,8 @@ static inline bool arch_acquire_resource_check(struct domain *d)
return false;
}
+void gstage_mode_detect(void);
+
#endif /* ASM__RISCV__P2M_H */
/*
diff --git a/xen/arch/riscv/include/asm/riscv_encoding.h b/xen/arch/riscv/include/asm/riscv_encoding.h
index 6cc8f4eb45..b15f5ad0b4 100644
--- a/xen/arch/riscv/include/asm/riscv_encoding.h
+++ b/xen/arch/riscv/include/asm/riscv_encoding.h
@@ -131,13 +131,16 @@
#define HGATP_MODE_SV32X4 _UL(1)
#define HGATP_MODE_SV39X4 _UL(8)
#define HGATP_MODE_SV48X4 _UL(9)
+#define HGATP_MODE_SV57X4 _UL(10)
#define HGATP32_MODE_SHIFT 31
+#define HGATP32_MODE_MASK _UL(0x80000000)
#define HGATP32_VMID_SHIFT 22
#define HGATP32_VMID_MASK _UL(0x1FC00000)
#define HGATP32_PPN _UL(0x003FFFFF)
#define HGATP64_MODE_SHIFT 60
+#define HGATP64_MODE_MASK _ULL(0xF000000000000000)
#define HGATP64_VMID_SHIFT 44
#define HGATP64_VMID_MASK _ULL(0x03FFF00000000000)
#define HGATP64_PPN _ULL(0x00000FFFFFFFFFFF)
@@ -170,6 +173,7 @@
#define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT
#define HGATP_VMID_MASK HGATP64_VMID_MASK
#define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT
+#define HGATP_MODE_MASK HGATP64_MODE_MASK
#else
#define MSTATUS_SD MSTATUS32_SD
#define SSTATUS_SD SSTATUS32_SD
@@ -181,6 +185,7 @@
#define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT
#define HGATP_VMID_MASK HGATP32_VMID_MASK
#define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT
+#define HGATP_MODE_MASK HGATP32_MODE_MASK
#endif
#define TOPI_IID_SHIFT 16
diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c
new file mode 100644
index 0000000000..56113a2f7a
--- /dev/null
+++ b/xen/arch/riscv/p2m.c
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/macros.h>
+#include <xen/sections.h>
+
+#include <asm/csr.h>
+#include <asm/flushtlb.h>
+#include <asm/riscv_encoding.h>
+
+unsigned long __ro_after_init gstage_mode;
+
+void __init gstage_mode_detect(void)
+{
+ unsigned int mode_idx;
+
+ const struct {
+ unsigned long mode;
+ unsigned int paging_levels;
+ const char *name;
+ } modes[] = {
+ /*
+ * Based on the RISC-V spec:
+ * When SXLEN=32, the only other valid setting for MODE is Sv32,
+ * a paged virtual-memory scheme described in Section 10.3.
+ * When SXLEN=64, three paged virtual-memory schemes are defined:
+ * Sv39, Sv48, and Sv57.
+ */
+#ifdef CONFIG_RISCV_32
+ { HGATP_MODE_SV32X4, 2, "Sv32x4" }
+#else
+ { HGATP_MODE_SV39X4, 3, "Sv39x4" },
+ { HGATP_MODE_SV48X4, 4, "Sv48x4" },
+ { HGATP_MODE_SV57X4, 5, "Sv57x4" },
+#endif
+ };
+
+ gstage_mode = HGATP_MODE_OFF;
+
+ for ( mode_idx = 0; mode_idx < ARRAY_SIZE(modes); mode_idx++ )
+ {
+ unsigned long mode = modes[mode_idx].mode;
+
+ csr_write(CSR_HGATP, MASK_INSR(mode, HGATP_MODE_MASK));
+
+ if ( MASK_EXTR(csr_read(CSR_HGATP), HGATP_MODE_MASK) == mode )
+ {
+ gstage_mode = mode;
+ break;
+ }
+ }
+
+ if ( gstage_mode == HGATP_MODE_OFF )
+ panic("Xen expects that G-stage won't be Bare mode\n");
+
+ printk("%s: G-stage mode is %s\n", __func__, modes[mode_idx].name);
+
+ csr_write(CSR_HGATP, 0);
+
+ /*
+ * From RISC-V spec:
+ * Speculative executions of the address-translation algorithm behave as
+ * non-speculative executions of the algorithm do, except that they must
+ * not set the dirty bit for a PTE, they must not trigger an exception,
+ * and they must not create address-translation cache entries if those
+ * entries would have been invalidated by any SFENCE.VMA instruction
+ * executed by the hart since the speculative execution of the algorithm
+ * began.
+ * The quote above mention explicitly SFENCE.VMA, but I assume it is true
+ * for HFENCE.VMA.
+ *
+ * Also, despite of the fact here it is mentioned that when V=0 two-stage
+ * address translation is inactivated:
+ * The current virtualization mode, denoted V, indicates whether the hart
+ * is currently executing in a guest. When V=1, the hart is either in
+ * virtual S-mode (VS-mode), or in virtual U-mode (VU-mode) atop a guest
+ * OS running in VS-mode. When V=0, the hart is either in M-mode, in
+ * HS-mode, or in U-mode atop an OS running in HS-mode. The
+ * virtualization mode also indicates whether two-stage address
+ * translation is active (V=1) or inactive (V=0).
+ * But on the same side, writing to hgatp register activates it:
+ * The hgatp register is considered active for the purposes of
+ * the address-translation algorithm unless the effective privilege mode
+ * is U and hstatus.HU=0.
+ *
+ * Thereby it leaves some room for speculation even in this stage of boot,
+ * so it could be that we polluted local TLB so flush all guest TLB.
+ */
+ local_hfence_gvma_all();
+}
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 483cdd7e17..87ee96bdb3 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -22,6 +22,7 @@
#include <asm/early_printk.h>
#include <asm/fixmap.h>
#include <asm/intc.h>
+#include <asm/p2m.h>
#include <asm/sbi.h>
#include <asm/setup.h>
#include <asm/traps.h>
@@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
console_init_postirq();
+ gstage_mode_detect();
+
printk("All set up\n");
machine_halt();
--
2.51.0
On 17.09.2025 23:55, Oleksii Kurochko wrote:
> --- /dev/null
> +++ b/xen/arch/riscv/p2m.c
> @@ -0,0 +1,91 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <xen/init.h>
> +#include <xen/lib.h>
> +#include <xen/macros.h>
> +#include <xen/sections.h>
> +
> +#include <asm/csr.h>
> +#include <asm/flushtlb.h>
> +#include <asm/riscv_encoding.h>
> +
> +unsigned long __ro_after_init gstage_mode;
> +
> +void __init gstage_mode_detect(void)
> +{
> + unsigned int mode_idx;
> +
> + const struct {
static and __initconst.
> + unsigned long mode;
Here and also for the global var: Why "long", when it's at most 4 bits?
> + unsigned int paging_levels;
> + const char *name;
More efficiently char[8]?
> + } modes[] = {
> + /*
> + * Based on the RISC-V spec:
> + * When SXLEN=32, the only other valid setting for MODE is Sv32,
The use of "other" is lacking some context here.
> + * a paged virtual-memory scheme described in Section 10.3.
Section numbers tend to change. Either to disambiguate by also spcifying
the doc version, or (preferably) you give the section title instead.
> + * When SXLEN=64, three paged virtual-memory schemes are defined:
> + * Sv39, Sv48, and Sv57.
> + */
> +#ifdef CONFIG_RISCV_32
> + { HGATP_MODE_SV32X4, 2, "Sv32x4" }
> +#else
> + { HGATP_MODE_SV39X4, 3, "Sv39x4" },
> + { HGATP_MODE_SV48X4, 4, "Sv48x4" },
> + { HGATP_MODE_SV57X4, 5, "Sv57x4" },
> +#endif
> + };
> +
> + gstage_mode = HGATP_MODE_OFF;
> +
> + for ( mode_idx = 0; mode_idx < ARRAY_SIZE(modes); mode_idx++ )
> + {
> + unsigned long mode = modes[mode_idx].mode;
> +
> + csr_write(CSR_HGATP, MASK_INSR(mode, HGATP_MODE_MASK));
> +
> + if ( MASK_EXTR(csr_read(CSR_HGATP), HGATP_MODE_MASK) == mode )
> + {
> + gstage_mode = mode;
> + break;
> + }
> + }
> +
> + if ( gstage_mode == HGATP_MODE_OFF )
> + panic("Xen expects that G-stage won't be Bare mode\n");
> +
> + printk("%s: G-stage mode is %s\n", __func__, modes[mode_idx].name);
I don't think the function name matters here at all.
> --- a/xen/arch/riscv/setup.c
> +++ b/xen/arch/riscv/setup.c
> @@ -22,6 +22,7 @@
> #include <asm/early_printk.h>
> #include <asm/fixmap.h>
> #include <asm/intc.h>
> +#include <asm/p2m.h>
> #include <asm/sbi.h>
> #include <asm/setup.h>
> #include <asm/traps.h>
> @@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
>
> console_init_postirq();
>
> + gstage_mode_detect();
I find it odd for something as fine grained as this to be called from top-
level start_xen(). Imo this wants to be a sub-function of whatever does
global paging and/or p2m preparations (or even more generally guest ones).
Jan
On 9/18/25 5:54 PM, Jan Beulich wrote:
> On 17.09.2025 23:55, Oleksii Kurochko wrote:
>> --- /dev/null
>> +++ b/xen/arch/riscv/p2m.c
>> @@ -0,0 +1,91 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +
>> +#include <xen/init.h>
>> +#include <xen/lib.h>
>> +#include <xen/macros.h>
>> +#include <xen/sections.h>
>> +
>> +#include <asm/csr.h>
>> +#include <asm/flushtlb.h>
>> +#include <asm/riscv_encoding.h>
>> +
>> +unsigned long __ro_after_init gstage_mode;
>> +
>> +void __init gstage_mode_detect(void)
>> +{
>> + unsigned int mode_idx;
>> +
>> + const struct {
> static and __initconst.
>
>> + unsigned long mode;
> Here and also for the global var: Why "long", when it's at most 4 bits?
No specific reason now. In the first version of this function they were used
directly to write a value to CSR register which is 'unsigned long'.
Considering that MASK_INSR() and MASK_EXTR() are used, 'char' should be enough
to describe mode.
>
>> + unsigned int paging_levels;
>> + const char *name;
> More efficiently char[8]?
I wanted to be sure that the name will always have correct length. But I agree
that char[8] is more efficient and a length could be checked "manually". I will
use char[8] instead of 'char *'.
>
>> + } modes[] = {
>> + /*
>> + * Based on the RISC-V spec:
>> + * When SXLEN=32, the only other valid setting for MODE is Sv32,
> The use of "other" is lacking some context here.
I will add the following:
Bare mode is always supported, regardless of SXLEN.
>
>> + * a paged virtual-memory scheme described in Section 10.3.
> Section numbers tend to change. Either to disambiguate by also spcifying
> the doc version, or (preferably) you give the section title instead.
I will take that into account in the future. For now, I think that this part
of the comment could be just dropped as here it doesn't matter what is a scheme
of Sv32.
>> --- a/xen/arch/riscv/setup.c
>> +++ b/xen/arch/riscv/setup.c
>> @@ -22,6 +22,7 @@
>> #include <asm/early_printk.h>
>> #include <asm/fixmap.h>
>> #include <asm/intc.h>
>> +#include <asm/p2m.h>
>> #include <asm/sbi.h>
>> #include <asm/setup.h>
>> #include <asm/traps.h>
>> @@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
>>
>> console_init_postirq();
>>
>> + gstage_mode_detect();
> I find it odd for something as fine grained as this to be called from top-
> level start_xen(). Imo this wants to be a sub-function of whatever does
> global paging and/or p2m preparations (or even more generally guest ones).
It makes sense. I will move the call to gstage_mode_detect() into p2m_init()
when the latter is introduced.
Probably, I will move the current patch after p2m_init() is introduced to make
gstage_mode_detect() static function.
Thanks.
~ Oleksii
On 9/24/25 1:31 PM, Oleksii Kurochko wrote: >>> --- a/xen/arch/riscv/setup.c >>> +++ b/xen/arch/riscv/setup.c >>> @@ -22,6 +22,7 @@ >>> #include <asm/early_printk.h> >>> #include <asm/fixmap.h> >>> #include <asm/intc.h> >>> +#include <asm/p2m.h> >>> #include <asm/sbi.h> >>> #include <asm/setup.h> >>> #include <asm/traps.h> >>> @@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id, >>> >>> console_init_postirq(); >>> >>> + gstage_mode_detect(); >> I find it odd for something as fine grained as this to be called from top- >> level start_xen(). Imo this wants to be a sub-function of whatever does >> global paging and/or p2m preparations (or even more generally guest ones). > It makes sense. I will move the call to gstage_mode_detect() into p2m_init() > when the latter is introduced. > Probably, I will move the current patch after p2m_init() is introduced to make > gstage_mode_detect() static function. Maybe putting gstage_mode_detect() into p2m_init() is not a good idea, since it is called during domain creation. I am not sure there is any point in calling gstage_mode_detect() each time. It seems that gstage_mode_detect() should be called once during physical CPU initialization. A sub-function (riscv_hart_mm_init()? probably, riscv should be dropped from the name) could be added in setup.c and then called in start_xen(), but is it really needed a separate sub-function for something that will be called once per initialization of pCPU? ~ Oleksii
On 24.09.2025 17:00, Oleksii Kurochko wrote: > > On 9/24/25 1:31 PM, Oleksii Kurochko wrote: >>>> --- a/xen/arch/riscv/setup.c >>>> +++ b/xen/arch/riscv/setup.c >>>> @@ -22,6 +22,7 @@ >>>> #include <asm/early_printk.h> >>>> #include <asm/fixmap.h> >>>> #include <asm/intc.h> >>>> +#include <asm/p2m.h> >>>> #include <asm/sbi.h> >>>> #include <asm/setup.h> >>>> #include <asm/traps.h> >>>> @@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id, >>>> >>>> console_init_postirq(); >>>> >>>> + gstage_mode_detect(); >>> I find it odd for something as fine grained as this to be called from top- >>> level start_xen(). Imo this wants to be a sub-function of whatever does >>> global paging and/or p2m preparations (or even more generally guest ones). >> It makes sense. I will move the call to gstage_mode_detect() into p2m_init() >> when the latter is introduced. >> Probably, I will move the current patch after p2m_init() is introduced to make >> gstage_mode_detect() static function. > > Maybe putting gstage_mode_detect() into p2m_init() is not a good idea, since it > is called during domain creation. I am not sure there is any point in calling > gstage_mode_detect() each time. > > It seems that gstage_mode_detect() should be called once during physical CPU > initialization. Indeed. > A sub-function (riscv_hart_mm_init()? probably, riscv should be dropped from > the name) could be added in setup.c and then called in start_xen(), but > is it really needed a separate sub-function for something that will be called > once per initialization of pCPU? Counter question: Is this going to remain the only piece of global init that's needed for P2M machinery? Right in the next patch you already add vmid_init() as another top-level call. Jan
On 9/25/25 3:46 PM, Jan Beulich wrote: > On 24.09.2025 17:00, Oleksii Kurochko wrote: >> On 9/24/25 1:31 PM, Oleksii Kurochko wrote: >>>>> --- a/xen/arch/riscv/setup.c >>>>> +++ b/xen/arch/riscv/setup.c >>>>> @@ -22,6 +22,7 @@ >>>>> #include <asm/early_printk.h> >>>>> #include <asm/fixmap.h> >>>>> #include <asm/intc.h> >>>>> +#include <asm/p2m.h> >>>>> #include <asm/sbi.h> >>>>> #include <asm/setup.h> >>>>> #include <asm/traps.h> >>>>> @@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id, >>>>> >>>>> console_init_postirq(); >>>>> >>>>> + gstage_mode_detect(); >>>> I find it odd for something as fine grained as this to be called from top- >>>> level start_xen(). Imo this wants to be a sub-function of whatever does >>>> global paging and/or p2m preparations (or even more generally guest ones). >>> It makes sense. I will move the call to gstage_mode_detect() into p2m_init() >>> when the latter is introduced. >>> Probably, I will move the current patch after p2m_init() is introduced to make >>> gstage_mode_detect() static function. >> Maybe putting gstage_mode_detect() into p2m_init() is not a good idea, since it >> is called during domain creation. I am not sure there is any point in calling >> gstage_mode_detect() each time. >> >> It seems that gstage_mode_detect() should be called once during physical CPU >> initialization. > Indeed. > >> A sub-function (riscv_hart_mm_init()? probably, riscv should be dropped from >> the name) could be added in setup.c and then called in start_xen(), but >> is it really needed a separate sub-function for something that will be called >> once per initialization of pCPU? > Counter question: Is this going to remain the only piece of global init that's > needed for P2M machinery? Right in the next patch you already add vmid_init() > as another top-level call. No, it isn’t the only piece — at least|gstage_mode_detect()| and|vmid_init()| are also needed for the P2M machinery. Okay, then it would be better to introduce a sub-function now and re-use it later for other pCPUs as well. ~ Oleksii
© 2016 - 2025 Red Hat, Inc.