Rework G-stage mode handling to make the selected mode descriptor reusable
outside of p2m initialization.
As max_gstage_mode is going to be reused by code that creates CPU nodes for
guest domains, not only max_gstage_mode->mode but also max_gstage_mode->name
is required. To support this, make max_gstage_mode a global pointer to one of
the entries in a global modes[] array, and remove get_max_supported_mode().
Update struct p2m_domain to store a pointer to a mode descriptor instead of
embedding the structure directly.
Refactor the modes[] array so that mode->name contains only the MMU scheme
name (without the "x4" suffix), as this value is reused when filling the
maximum MMU type passed to the guest. According to DT bindings [1], the MMU
type must not include the "x4" suffix. Use "none" for the Bare mode to match
the DT binding requirements.
Adjust modes[]->paging_levels to represent the maximum paging level rather
than the total number of levels. This ensures that P2M_ROOT_LEVEL() and its
users behave correctly without relying on hardcoded p2m mode values.
Finally, drop __initconst from the modes[] declaration, as the array is
referenced via p2m->mode and max_gstage_mode beyond the init stage.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/riscv/cpus.yaml?h=v6.19-rc3#n82
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
xen/arch/riscv/include/asm/p2m.h | 7 ++--
xen/arch/riscv/p2m.c | 60 +++++++++++++-------------------
xen/arch/riscv/vmid.c | 2 +-
3 files changed, 30 insertions(+), 39 deletions(-)
diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h
index c6d846b96fb4..4441c0400b83 100644
--- a/xen/arch/riscv/include/asm/p2m.h
+++ b/xen/arch/riscv/include/asm/p2m.h
@@ -13,7 +13,7 @@
#define P2M_ROOT_ORDER (ilog2(GSTAGE_ROOT_PAGE_TABLE_SIZE) - PAGE_SHIFT)
#define P2M_ROOT_PAGES BIT(P2M_ROOT_ORDER, U)
-#define P2M_ROOT_LEVEL(p2m) ((p2m)->mode.paging_levels)
+#define P2M_ROOT_LEVEL(p2m) ((p2m)->mode->paging_levels)
/*
* According to the RISC-V spec:
@@ -58,6 +58,8 @@ struct gstage_mode_desc {
char name[8];
};
+extern const struct gstage_mode_desc *max_gstage_mode;
+
/* Per-p2m-table state */
struct p2m_domain {
/*
@@ -71,7 +73,7 @@ struct p2m_domain {
/* The root of the p2m tree. May be concatenated */
struct page_info *root;
- struct gstage_mode_desc mode;
+ const struct gstage_mode_desc *mode;
/* Back pointer to domain */
struct domain *domain;
@@ -218,7 +220,6 @@ static inline bool arch_acquire_resource_check(struct domain *d)
}
void guest_mm_init(void);
-unsigned char get_max_supported_mode(void);
int p2m_init(struct domain *d);
diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c
index 886e06196ba2..dce1eb205ec9 100644
--- a/xen/arch/riscv/p2m.c
+++ b/xen/arch/riscv/p2m.c
@@ -45,18 +45,32 @@ struct p2m_pte_ctx {
unsigned int level; /* Paging level at which the PTE resides. */
};
-static struct gstage_mode_desc __ro_after_init max_gstage_mode = {
- .mode = HGATP_MODE_OFF,
- .paging_levels = 0,
- .name = "Bare",
-};
-
/*
* Set to the maximum configured support for IPA bits, so the number of IPA bits can be
* restricted by external entity (e.g. IOMMU).
*/
unsigned int __read_mostly p2m_ipa_bits = PADDR_BITS;
+static const struct gstage_mode_desc modes[] = {
+ /*
+ * Based on the RISC-V spec:
+ * Bare mode is always supported, regardless of SXLEN.
+ * When SXLEN=32, the only other valid setting for MODE is Sv32.
+ * When SXLEN=64, three paged virtual-memory schemes are defined:
+ * Sv39, Sv48, and Sv57.
+ */
+ [0] = { HGATP_MODE_OFF, 0, "none" },
+#ifdef CONFIG_RISCV_32
+ [1] = { HGATP_MODE_SV32X4, 1, "sv32" }
+#else
+ [2] = { HGATP_MODE_SV39X4, 2, "sv39" },
+ [3] = { HGATP_MODE_SV48X4, 3, "sv48" },
+ [4] = { HGATP_MODE_SV57X4, 4, "sv57" },
+#endif
+};
+
+const struct gstage_mode_desc * __ro_after_init max_gstage_mode = &modes[0];
+
static void p2m_free_page(struct p2m_domain *p2m, struct page_info *pg);
static inline void p2m_free_metadata_page(struct p2m_domain *p2m,
@@ -69,11 +83,6 @@ static inline void p2m_free_metadata_page(struct p2m_domain *p2m,
}
}
-unsigned char get_max_supported_mode(void)
-{
- return max_gstage_mode.mode;
-}
-
/*
* If anything is changed here, it may also require updates to
* p2m_{get,set}_type().
@@ -154,23 +163,6 @@ static pte_t *p2m_get_root_pointer(struct p2m_domain *p2m, gfn_t gfn)
static void __init gstage_mode_detect(void)
{
- static const struct gstage_mode_desc modes[] __initconst = {
- /*
- * Based on the RISC-V spec:
- * Bare mode is always supported, regardless of SXLEN.
- * When SXLEN=32, the only other valid setting for MODE is Sv32.
- * When SXLEN=64, three paged virtual-memory schemes are defined:
- * Sv39, Sv48, and Sv57.
- */
-#ifdef CONFIG_RISCV_32
- { HGATP_MODE_SV32X4, 2, "Sv32x4" }
-#else
- { HGATP_MODE_SV39X4, 3, "Sv39x4" },
- { HGATP_MODE_SV48X4, 4, "Sv48x4" },
- { HGATP_MODE_SV57X4, 5, "Sv57x4" },
-#endif
- };
-
for ( unsigned int mode_idx = ARRAY_SIZE(modes); mode_idx-- > 0; )
{
unsigned long mode = modes[mode_idx].mode;
@@ -179,16 +171,16 @@ static void __init gstage_mode_detect(void)
if ( MASK_EXTR(csr_read(CSR_HGATP), HGATP_MODE_MASK) == mode )
{
- max_gstage_mode = modes[mode_idx];
+ max_gstage_mode = &modes[mode_idx];
break;
}
}
- if ( max_gstage_mode.mode == HGATP_MODE_OFF )
+ if ( max_gstage_mode->mode == HGATP_MODE_OFF )
panic("Xen expects that G-stage won't be Bare mode\n");
- printk("Max supported G-stage mode is %s\n", max_gstage_mode.name);
+ printk("Max supported G-stage mode is %sx4\n", max_gstage_mode->name);
csr_write(CSR_HGATP, 0);
@@ -289,7 +281,7 @@ static void clear_and_clean_page(struct page_info *page, bool clean_dcache)
unsigned long construct_hgatp(const struct p2m_domain *p2m, uint16_t vmid)
{
return MASK_INSR(mfn_x(page_to_mfn(p2m->root)), HGATP_PPN_MASK) |
- MASK_INSR(p2m->mode.mode, HGATP_MODE_MASK) |
+ MASK_INSR(p2m->mode->mode, HGATP_MODE_MASK) |
MASK_INSR(vmid, HGATP_VMID_MASK);
}
@@ -369,9 +361,7 @@ int p2m_init(struct domain *d)
#endif
/* TODO: don't hardcode used for a domain g-stage mode. */
- p2m->mode.mode = HGATP_MODE_SV39X4;
- p2m->mode.paging_levels = 2;
- safe_strcpy(p2m->mode.name, "Sv39x4");
+ p2m->mode = &modes[2];
return 0;
}
diff --git a/xen/arch/riscv/vmid.c b/xen/arch/riscv/vmid.c
index 8fbcd500f24d..11c7e9d6d6c8 100644
--- a/xen/arch/riscv/vmid.c
+++ b/xen/arch/riscv/vmid.c
@@ -52,7 +52,7 @@ static DEFINE_PER_CPU(struct vmid_data, vmid_data);
static unsigned int vmidlen_detect(void)
{
unsigned int vmid_bits;
- unsigned char gstage_mode = get_max_supported_mode();
+ unsigned char gstage_mode = max_gstage_mode->mode;
/*
* According to the RISC-V Privileged Architecture Spec:
--
2.53.0
On 10.03.2026 18:08, Oleksii Kurochko wrote:
> Rework G-stage mode handling to make the selected mode descriptor reusable
> outside of p2m initialization.
>
> As max_gstage_mode is going to be reused by code that creates CPU nodes for
> guest domains, not only max_gstage_mode->mode but also max_gstage_mode->name
> is required.
I guess I'm not DT-savvy enough to understand why that would be.
> To support this, make max_gstage_mode a global pointer to one of
> the entries in a global modes[] array, and remove get_max_supported_mode().
>
> Update struct p2m_domain to store a pointer to a mode descriptor instead of
> embedding the structure directly.
>
> Refactor the modes[] array so that mode->name contains only the MMU scheme
> name (without the "x4" suffix), as this value is reused when filling the
> maximum MMU type passed to the guest. According to DT bindings [1], the MMU
> type must not include the "x4" suffix. Use "none" for the Bare mode to match
> the DT binding requirements.
I expect this DT aspect is also why Sv changes to sv in the table? (Which
is a little unhelpful for the printk() where it's used.)
> Adjust modes[]->paging_levels to represent the maximum paging level rather
> than the total number of levels. This ensures that P2M_ROOT_LEVEL() and its
> users behave correctly without relying on hardcoded p2m mode values.
>
> Finally, drop __initconst from the modes[] declaration, as the array is
> referenced via p2m->mode and max_gstage_mode beyond the init stage.
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/riscv/cpus.yaml?h=v6.19-rc3#n82
Is a reference into Linux doc really providing something "canonical"? Surely
there's an independent spec somewhere?
> --- a/xen/arch/riscv/p2m.c
> +++ b/xen/arch/riscv/p2m.c
> @@ -45,18 +45,32 @@ struct p2m_pte_ctx {
> unsigned int level; /* Paging level at which the PTE resides. */
> };
>
> -static struct gstage_mode_desc __ro_after_init max_gstage_mode = {
> - .mode = HGATP_MODE_OFF,
> - .paging_levels = 0,
> - .name = "Bare",
> -};
> -
> /*
> * Set to the maximum configured support for IPA bits, so the number of IPA bits can be
> * restricted by external entity (e.g. IOMMU).
> */
> unsigned int __read_mostly p2m_ipa_bits = PADDR_BITS;
>
> +static const struct gstage_mode_desc modes[] = {
As a function scope static this was a fine identifier. Please consider whether
with the wider scope gstage_modes[] might not be better.
> + /*
> + * Based on the RISC-V spec:
> + * Bare mode is always supported, regardless of SXLEN.
> + * When SXLEN=32, the only other valid setting for MODE is Sv32.
> + * When SXLEN=64, three paged virtual-memory schemes are defined:
> + * Sv39, Sv48, and Sv57.
> + */
> + [0] = { HGATP_MODE_OFF, 0, "none" },
> +#ifdef CONFIG_RISCV_32
> + [1] = { HGATP_MODE_SV32X4, 1, "sv32" }
> +#else
> + [2] = { HGATP_MODE_SV39X4, 2, "sv39" },
> + [3] = { HGATP_MODE_SV48X4, 3, "sv48" },
> + [4] = { HGATP_MODE_SV57X4, 4, "sv57" },
> +#endif
> +};
The dedicated initializer form isn't adding any value here (whereas it slightly
hampers readability). You really don't want the array to be sparsely populated,
so perhaps better to leave as it was before?
Jan
On 4/1/26 3:19 PM, Jan Beulich wrote:
> On 10.03.2026 18:08, Oleksii Kurochko wrote:
>> Rework G-stage mode handling to make the selected mode descriptor reusable
>> outside of p2m initialization.
>>
>> As max_gstage_mode is going to be reused by code that creates CPU nodes for
>> guest domains, not only max_gstage_mode->mode but also max_gstage_mode->name
>> is required.
>
> I guess I'm not DT-savvy enough to understand why that would be.
There is an optional mmu-type property for each cpu:
https://github.com/riscv-non-isa/riscv-device-tree-doc/blob/master/bindings/riscv/cpus.txt#L73
>
>> To support this, make max_gstage_mode a global pointer to one of
>> the entries in a global modes[] array, and remove get_max_supported_mode().
>>
>> Update struct p2m_domain to store a pointer to a mode descriptor instead of
>> embedding the structure directly.
>>
>> Refactor the modes[] array so that mode->name contains only the MMU scheme
>> name (without the "x4" suffix), as this value is reused when filling the
>> maximum MMU type passed to the guest. According to DT bindings [1], the MMU
>> type must not include the "x4" suffix. Use "none" for the Bare mode to match
>> the DT binding requirements.
>
> I expect this DT aspect is also why Sv changes to sv in the table? (Which
> is a little unhelpful for the printk() where it's used.)
Yes. According to the link above the following options could be passed:
"riscv,sv32"
"riscv,sv39"
"riscv,sv48"
>
>> Adjust modes[]->paging_levels to represent the maximum paging level rather
>> than the total number of levels. This ensures that P2M_ROOT_LEVEL() and its
>> users behave correctly without relying on hardcoded p2m mode values.
>>
>> Finally, drop __initconst from the modes[] declaration, as the array is
>> referenced via p2m->mode and max_gstage_mode beyond the init stage.
>>
>> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/riscv/cpus.yaml?h=v6.19-rc3#n82
>
> Is a reference into Linux doc really providing something "canonical"? Surely
> there's an independent spec somewhere?
I wasn't able to find better source for arch-specific definitions. For
example, the source mentioned above has outdated mmu-type properties
which should also contain riscv,sv57 and riscv,none.
>
>> --- a/xen/arch/riscv/p2m.c
>> +++ b/xen/arch/riscv/p2m.c
>> @@ -45,18 +45,32 @@ struct p2m_pte_ctx {
>> unsigned int level; /* Paging level at which the PTE resides. */
>> };
>>
>> -static struct gstage_mode_desc __ro_after_init max_gstage_mode = {
>> - .mode = HGATP_MODE_OFF,
>> - .paging_levels = 0,
>> - .name = "Bare",
>> -};
>> -
>> /*
>> * Set to the maximum configured support for IPA bits, so the number of IPA bits can be
>> * restricted by external entity (e.g. IOMMU).
>> */
>> unsigned int __read_mostly p2m_ipa_bits = PADDR_BITS;
>>
>> +static const struct gstage_mode_desc modes[] = {
>
> As a function scope static this was a fine identifier. Please consider whether
> with the wider scope gstage_modes[] might not be better.
>
>> + /*
>> + * Based on the RISC-V spec:
>> + * Bare mode is always supported, regardless of SXLEN.
>> + * When SXLEN=32, the only other valid setting for MODE is Sv32.
>> + * When SXLEN=64, three paged virtual-memory schemes are defined:
>> + * Sv39, Sv48, and Sv57.
>> + */
>> + [0] = { HGATP_MODE_OFF, 0, "none" },
>> +#ifdef CONFIG_RISCV_32
>> + [1] = { HGATP_MODE_SV32X4, 1, "sv32" }
>> +#else
>> + [2] = { HGATP_MODE_SV39X4, 2, "sv39" },
>> + [3] = { HGATP_MODE_SV48X4, 3, "sv48" },
>> + [4] = { HGATP_MODE_SV57X4, 4, "sv57" },
>> +#endif
>> +};
>
> The dedicated initializer form isn't adding any value here (whereas it slightly
> hampers readability). You really don't want the array to be sparsely populated,
> so perhaps better to leave as it was before?
I need modes[] to be outside of gstage_mode_detect() as it then could be
re-used. For example, if expected G-stage mode should be passed by DTS
property then in DTS property we'll have something like:
chosen {
...
DOMU1 {
mmu-type="riscv,sv48";
...
}
...
}
And I will need to have another functions something like:
static unsigned int find_gstage_mode(const char *mmu_type) {...}
which will re-use modes[] to find a correspondent mode and return an
index (or return just correspondent mode) for that mode to then re-use
it to initialize p2m->mode:
p2m->mode = &modes[find_gstage_mode(mmu_type)];
~ Oleksii
On 07.04.2026 12:47, Oleksii Kurochko wrote:
> On 4/1/26 3:19 PM, Jan Beulich wrote:
>> On 10.03.2026 18:08, Oleksii Kurochko wrote:
>>> --- a/xen/arch/riscv/p2m.c
>>> +++ b/xen/arch/riscv/p2m.c
>>> @@ -45,18 +45,32 @@ struct p2m_pte_ctx {
>>> unsigned int level; /* Paging level at which the PTE resides. */
>>> };
>>>
>>> -static struct gstage_mode_desc __ro_after_init max_gstage_mode = {
>>> - .mode = HGATP_MODE_OFF,
>>> - .paging_levels = 0,
>>> - .name = "Bare",
>>> -};
>>> -
>>> /*
>>> * Set to the maximum configured support for IPA bits, so the number of IPA bits can be
>>> * restricted by external entity (e.g. IOMMU).
>>> */
>>> unsigned int __read_mostly p2m_ipa_bits = PADDR_BITS;
>>>
>>> +static const struct gstage_mode_desc modes[] = {
>>
>> As a function scope static this was a fine identifier. Please consider whether
>> with the wider scope gstage_modes[] might not be better.
>>
>>> + /*
>>> + * Based on the RISC-V spec:
>>> + * Bare mode is always supported, regardless of SXLEN.
>>> + * When SXLEN=32, the only other valid setting for MODE is Sv32.
>>> + * When SXLEN=64, three paged virtual-memory schemes are defined:
>>> + * Sv39, Sv48, and Sv57.
>>> + */
>>> + [0] = { HGATP_MODE_OFF, 0, "none" },
>>> +#ifdef CONFIG_RISCV_32
>>> + [1] = { HGATP_MODE_SV32X4, 1, "sv32" }
>>> +#else
>>> + [2] = { HGATP_MODE_SV39X4, 2, "sv39" },
>>> + [3] = { HGATP_MODE_SV48X4, 3, "sv48" },
>>> + [4] = { HGATP_MODE_SV57X4, 4, "sv57" },
>>> +#endif
>>> +};
>>
>> The dedicated initializer form isn't adding any value here (whereas it slightly
>> hampers readability). You really don't want the array to be sparsely populated,
>> so perhaps better to leave as it was before?
>
> I need modes[] to be outside of gstage_mode_detect() as it then could be
> re-used.
Sure, and I didn't say "where it was before". I said "as it was before", i.e.
without dedicated initializers.
Jan
> For example, if expected G-stage mode should be passed by DTS
> property then in DTS property we'll have something like:
> chosen {
> ...
> DOMU1 {
> mmu-type="riscv,sv48";
> ...
> }
> ...
> }
>
> And I will need to have another functions something like:
> static unsigned int find_gstage_mode(const char *mmu_type) {...}
> which will re-use modes[] to find a correspondent mode and return an
> index (or return just correspondent mode) for that mode to then re-use
> it to initialize p2m->mode:
> p2m->mode = &modes[find_gstage_mode(mmu_type)];
>
> ~ Oleksii
© 2016 - 2026 Red Hat, Inc.