Set up fixmap mappings and the L0 page table for fixmap support.
Define new macros in riscv/config.h for calculating
the FIXMAP_BASE address, including BOOT_FDT_VIRT_{START, SIZE},
XEN_VIRT_SIZE, and XEN_VIRT_END.
Update the check for Xen size in riscv/lds.S to use
XEN_VIRT_SIZE instead of a hardcoded constant.
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V4:
- move definitions of XEN_VIRT_SIZE, BOOT_FDT_VIRT_{START,SIZE}, FIXMAP_{BASE,ADDR}
below XEN_VIRT_START to have definitions appear in order.
- define FIX_LAST as (FIX_MISC + 1) to have a guard slot at the end.
- s/enumerated/numbered in the comment
- update the cycle which looks for L1 page table in setup_fixmap_mapping_function() and
the comment above him.
- drop fences inside write_pte() and put 'fence r,r' in setup_fixmap() before sfence_vma().
- update the commit message
- drop printk message inside setup_fixmap().
---
Changes in V3:
- s/XEN_SIZE/XEN_VIRT_SIZE
- drop usage of XEN_VIRT_END.
- sort newly introduced defines in config.h by address
- code style fixes
- drop runtime check of that pte is valid as it was checked in L1 page table finding cycle by BUG_ON().
- update implementation of write_pte() with FENCE rw, rw.
- add BUILD_BUG_ON() to check that amount of entries aren't bigger then entries in page table.
- drop set_fixmap, clear_fixmap declarations as they aren't used and defined now
- update the commit message.
- s/__ASM_FIXMAP_H/ASM_FIXMAP_H
- add SPDX-License-Identifier: GPL-2.0
---
xen/arch/riscv/include/asm/config.h | 8 ++++++
xen/arch/riscv/include/asm/fixmap.h | 44 +++++++++++++++++++++++++++++
xen/arch/riscv/include/asm/mm.h | 2 ++
xen/arch/riscv/include/asm/page.h | 6 ++++
xen/arch/riscv/mm.c | 43 ++++++++++++++++++++++++++++
xen/arch/riscv/setup.c | 2 ++
xen/arch/riscv/xen.lds.S | 2 +-
7 files changed, 106 insertions(+), 1 deletion(-)
create mode 100644 xen/arch/riscv/include/asm/fixmap.h
diff --git a/xen/arch/riscv/include/asm/config.h b/xen/arch/riscv/include/asm/config.h
index 50583aafdc..4b4cc529a9 100644
--- a/xen/arch/riscv/include/asm/config.h
+++ b/xen/arch/riscv/include/asm/config.h
@@ -74,6 +74,14 @@
#error "unsupported RV_STAGE1_MODE"
#endif
+#define XEN_VIRT_SIZE MB(2)
+
+#define BOOT_FDT_VIRT_START (XEN_VIRT_START + XEN_VIRT_SIZE)
+#define BOOT_FDT_VIRT_SIZE MB(4)
+
+#define FIXMAP_BASE (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
+#define FIXMAP_ADDR(n) (FIXMAP_BASE + (n) * PAGE_SIZE)
+
#define DIRECTMAP_SLOT_END 509
#define DIRECTMAP_SLOT_START 200
#define DIRECTMAP_VIRT_START SLOTN(DIRECTMAP_SLOT_START)
diff --git a/xen/arch/riscv/include/asm/fixmap.h b/xen/arch/riscv/include/asm/fixmap.h
new file mode 100644
index 0000000000..2ecd05dd9f
--- /dev/null
+++ b/xen/arch/riscv/include/asm/fixmap.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ */
+#ifndef ASM_FIXMAP_H
+#define ASM_FIXMAP_H
+
+#include <xen/bug.h>
+#include <xen/page-size.h>
+#include <xen/pmap.h>
+
+#include <asm/page.h>
+
+/* Fixmap slots */
+#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
+#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
+#define FIX_MISC (FIX_PMAP_END + 1) /* Ephemeral mappings of hardware */
+
+#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */
+
+#define FIXADDR_START FIXMAP_ADDR(0)
+#define FIXADDR_TOP FIXMAP_ADDR(FIX_LAST)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Direct access to xen_fixmap[] should only happen when {set,
+ * clear}_fixmap() is unusable (e.g. where we would end up to
+ * recursively call the helpers).
+ */
+extern pte_t xen_fixmap[];
+
+#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
+
+static inline unsigned int virt_to_fix(vaddr_t vaddr)
+{
+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+
+ return ((vaddr - FIXADDR_START) >> PAGE_SHIFT);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ASM_FIXMAP_H */
diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index 25af9e1aaa..a0bdc2bc3a 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -255,4 +255,6 @@ static inline unsigned int arch_get_dma_bitsize(void)
return 32; /* TODO */
}
+void setup_fixmap_mappings(void);
+
#endif /* _ASM_RISCV_MM_H */
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index c831e16417..5db3edb100 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
BUG_ON("unimplemented");
}
+/* Write a pagetable entry. */
+static inline void write_pte(pte_t *p, pte_t pte)
+{
+ *p = pte;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PAGE_H */
diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c
index 7d09e781bf..b8ff91cf4e 100644
--- a/xen/arch/riscv/mm.c
+++ b/xen/arch/riscv/mm.c
@@ -12,6 +12,7 @@
#include <asm/early_printk.h>
#include <asm/csr.h>
#include <asm/current.h>
+#include <asm/fixmap.h>
#include <asm/page.h>
#include <asm/processor.h>
@@ -49,6 +50,9 @@ stage1_pgtbl_root[PAGETABLE_ENTRIES];
pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
stage1_pgtbl_nonroot[PGTBL_INITIAL_COUNT * PAGETABLE_ENTRIES];
+pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
+xen_fixmap[PAGETABLE_ENTRIES];
+
#define HANDLE_PGTBL(curr_lvl_num) \
index = pt_index(curr_lvl_num, page_addr); \
if ( pte_is_valid(pgtbl[index]) ) \
@@ -191,6 +195,45 @@ static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
return is_mode_supported;
}
+void __init setup_fixmap_mappings(void)
+{
+ pte_t *pte, tmp;
+ unsigned int i;
+
+ BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
+
+ pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))];
+
+ /*
+ * In RISC-V page table levels are numbered from Lx to L0 where
+ * x is the highest page table level for currect MMU mode ( for example,
+ * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
+ *
+ * In this cycle we want to find L1 page table because as L0 page table
+ * xen_fixmap[] will be used.
+ */
+ for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
+ {
+ BUG_ON(!pte_is_valid(*pte));
+
+ pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
+ pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
+ }
+
+ BUG_ON(pte_is_valid(*pte));
+
+ tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE);
+ write_pte(pte, tmp);
+
+ RISCV_FENCE(rw, rw);
+ sfence_vma();
+
+ /*
+ * We only need the zeroeth table allocated, but not the PTEs set, because
+ * set_fixmap() will set them on the fly.
+ */
+}
+
/*
* setup_initial_pagetables:
*
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 4defad68f4..13f0e8c77d 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -46,6 +46,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
test_macros_from_bug_h();
#endif
+ setup_fixmap_mappings();
+
printk("All set up\n");
for ( ;; )
diff --git a/xen/arch/riscv/xen.lds.S b/xen/arch/riscv/xen.lds.S
index 070b19d915..7a683f6065 100644
--- a/xen/arch/riscv/xen.lds.S
+++ b/xen/arch/riscv/xen.lds.S
@@ -181,6 +181,6 @@ ASSERT(!SIZEOF(.got.plt), ".got.plt non-empty")
* Changing the size of Xen binary can require an update of
* PGTBL_INITIAL_COUNT.
*/
-ASSERT(_end - _start <= MB(2), "Xen too large for early-boot assumptions")
+ASSERT(_end - _start <= XEN_VIRT_SIZE, "Xen too large for early-boot assumptions")
ASSERT(_ident_end - _ident_start <= IDENT_AREA_SIZE, "identity region is too big");
--
2.45.2
On 09.08.2024 18:19, Oleksii Kurochko wrote:
> --- a/xen/arch/riscv/include/asm/config.h
> +++ b/xen/arch/riscv/include/asm/config.h
> @@ -74,6 +74,14 @@
> #error "unsupported RV_STAGE1_MODE"
> #endif
>
> +#define XEN_VIRT_SIZE MB(2)
> +
> +#define BOOT_FDT_VIRT_START (XEN_VIRT_START + XEN_VIRT_SIZE)
> +#define BOOT_FDT_VIRT_SIZE MB(4)
> +
> +#define FIXMAP_BASE (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
> +#define FIXMAP_ADDR(n) (FIXMAP_BASE + (n) * PAGE_SIZE)
Just to confirm: Did you consider leaving gaps between the regions, but
then discarded that idea for whatever reason? It's not like you're tight
on address space.
As to FIXMAP_ADDR() - wouldn't that be a better fit in fixmap.h?
> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/fixmap.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * fixmap.h: compile-time virtual memory allocation
> + */
> +#ifndef ASM_FIXMAP_H
> +#define ASM_FIXMAP_H
> +
> +#include <xen/bug.h>
> +#include <xen/page-size.h>
> +#include <xen/pmap.h>
> +
> +#include <asm/page.h>
> +
> +/* Fixmap slots */
> +#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
> +#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
> +#define FIX_MISC (FIX_PMAP_END + 1) /* Ephemeral mappings of hardware */
> +
> +#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */
As per my comment on the earlier version: No, I don't think this arranges
for a guard slot. It merely arranges for FIX_MISC to not trigger the
BUG_ON() in virt_to_fix().
> --- a/xen/arch/riscv/include/asm/page.h
> +++ b/xen/arch/riscv/include/asm/page.h
> @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
> BUG_ON("unimplemented");
> }
>
> +/* Write a pagetable entry. */
> +static inline void write_pte(pte_t *p, pte_t pte)
> +{
> + *p = pte;
> +}
No use of write_atomic() here? And no read_pte() counterpart right away (then
also properly using read_atomic())?
> @@ -191,6 +195,45 @@ static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
> return is_mode_supported;
> }
>
> +void __init setup_fixmap_mappings(void)
> +{
> + pte_t *pte, tmp;
> + unsigned int i;
> +
> + BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
> +
> + pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))];
> +
> + /*
> + * In RISC-V page table levels are numbered from Lx to L0 where
> + * x is the highest page table level for currect MMU mode ( for example,
> + * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
> + *
> + * In this cycle we want to find L1 page table because as L0 page table
> + * xen_fixmap[] will be used.
> + */
> + for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
> + {
> + BUG_ON(!pte_is_valid(*pte));
> +
> + pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
> + pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
> + }
> +
> + BUG_ON(pte_is_valid(*pte));
> +
> + tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE);
> + write_pte(pte, tmp);
> +
> + RISCV_FENCE(rw, rw);
In the changes section you say "r,r", and I was wondering about that. I
take it that "rw,rw" is really what's needed here.
Jan
On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote:
> On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > --- a/xen/arch/riscv/include/asm/config.h
> > +++ b/xen/arch/riscv/include/asm/config.h
> > @@ -74,6 +74,14 @@
> > #error "unsupported RV_STAGE1_MODE"
> > #endif
> >
> > +#define XEN_VIRT_SIZE MB(2)
> > +
> > +#define BOOT_FDT_VIRT_START (XEN_VIRT_START + XEN_VIRT_SIZE)
> > +#define BOOT_FDT_VIRT_SIZE MB(4)
> > +
> > +#define FIXMAP_BASE (BOOT_FDT_VIRT_START +
> > BOOT_FDT_VIRT_SIZE)
> > +#define FIXMAP_ADDR(n) (FIXMAP_BASE + (n) * PAGE_SIZE)
>
> Just to confirm: Did you consider leaving gaps between the regions,
> but
> then discarded that idea for whatever reason? It's not like you're
> tight
> on address space.
No, I would like to leave gaps between the regions. I have a slot gap
where it is possible, inside a slot I decided just not to add this gap
without any specific reason TBH. I can add some gap ( for example,
0x100000 ) if it would be better and consistent.
>
> As to FIXMAP_ADDR() - wouldn't that be a better fit in fixmap.h?
Sure, it would more correct place for this macros.
>
> > --- /dev/null
> > +++ b/xen/arch/riscv/include/asm/fixmap.h
> > @@ -0,0 +1,44 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * fixmap.h: compile-time virtual memory allocation
> > + */
> > +#ifndef ASM_FIXMAP_H
> > +#define ASM_FIXMAP_H
> > +
> > +#include <xen/bug.h>
> > +#include <xen/page-size.h>
> > +#include <xen/pmap.h>
> > +
> > +#include <asm/page.h>
> > +
> > +/* Fixmap slots */
> > +#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
> > +#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of
> > PMAP */
> > +#define FIX_MISC (FIX_PMAP_END + 1) /* Ephemeral mappings of
> > hardware */
> > +
> > +#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */
>
> As per my comment on the earlier version: No, I don't think this
> arranges
> for a guard slot. It merely arranges for FIX_MISC to not trigger the
> BUG_ON() in virt_to_fix().
>
> > --- a/xen/arch/riscv/include/asm/page.h
> > +++ b/xen/arch/riscv/include/asm/page.h
> > @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned
> > long mfn, bool sync_icache)
> > BUG_ON("unimplemented");
> > }
> >
> > +/* Write a pagetable entry. */
> > +static inline void write_pte(pte_t *p, pte_t pte)
> > +{
> > + *p = pte;
> > +}
>
> No use of write_atomic() here? And no read_pte() counterpart right
> away (then
> also properly using read_atomic())?
I wanted to avoid the fence before "*p=pte" which in case of
write_atomic() will be present. Won't it be enough to use here
WRITE_ONCE()?
>
> > @@ -191,6 +195,45 @@ static bool __init
> > check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
> > return is_mode_supported;
> > }
> >
> > +void __init setup_fixmap_mappings(void)
> > +{
> > + pte_t *pte, tmp;
> > + unsigned int i;
> > +
> > + BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
> > +
> > + pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL,
> > FIXMAP_ADDR(0))];
> > +
> > + /*
> > + * In RISC-V page table levels are numbered from Lx to L0
> > where
> > + * x is the highest page table level for currect MMU mode (
> > for example,
> > + * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
> > + *
> > + * In this cycle we want to find L1 page table because as L0
> > page table
> > + * xen_fixmap[] will be used.
> > + */
> > + for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
> > + {
> > + BUG_ON(!pte_is_valid(*pte));
> > +
> > + pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
> > + pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
> > + }
> > +
> > + BUG_ON(pte_is_valid(*pte));
> > +
> > + tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap),
> > PTE_TABLE);
> > + write_pte(pte, tmp);
> > +
> > + RISCV_FENCE(rw, rw);
>
> In the changes section you say "r,r", and I was wondering about that.
> I
> take it that "rw,rw" is really what's needed here.
It was typo. I will update the change log.
Thanks.
~ Oleksii
On 14.08.2024 16:21, oleksii.kurochko@gmail.com wrote:
> On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote:
>> On 09.08.2024 18:19, Oleksii Kurochko wrote:
>>> --- a/xen/arch/riscv/include/asm/page.h
>>> +++ b/xen/arch/riscv/include/asm/page.h
>>> @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned
>>> long mfn, bool sync_icache)
>>> BUG_ON("unimplemented");
>>> }
>>>
>>> +/* Write a pagetable entry. */
>>> +static inline void write_pte(pte_t *p, pte_t pte)
>>> +{
>>> + *p = pte;
>>> +}
>>
>> No use of write_atomic() here? And no read_pte() counterpart right
>> away (then
>> also properly using read_atomic())?
> I wanted to avoid the fence before "*p=pte" which in case of
> write_atomic() will be present.
Well, this goes back to write_atomic() resolving to write{b,w,l,q}() for
unclear reasons; even the comment in our atomic.h says "For some reason".
The fence there is of course getting in the way here. I keep forgetting
about that aspect, because neither x86 nor Arm has anything similar
afaics.
> Won't it be enough to use here WRITE_ONCE()?
Maybe. I'm not entirely sure.
Jan
On Wed, 2024-08-14 at 17:08 +0200, Jan Beulich wrote:
> On 14.08.2024 16:21, oleksii.kurochko@gmail.com wrote:
> > On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote:
> > > On 09.08.2024 18:19, Oleksii Kurochko wrote:
> > > > --- a/xen/arch/riscv/include/asm/page.h
> > > > +++ b/xen/arch/riscv/include/asm/page.h
> > > > @@ -81,6 +81,12 @@ static inline void
> > > > flush_page_to_ram(unsigned
> > > > long mfn, bool sync_icache)
> > > > BUG_ON("unimplemented");
> > > > }
> > > >
> > > > +/* Write a pagetable entry. */
> > > > +static inline void write_pte(pte_t *p, pte_t pte)
> > > > +{
> > > > + *p = pte;
> > > > +}
> > >
> > > No use of write_atomic() here? And no read_pte() counterpart
> > > right
> > > away (then
> > > also properly using read_atomic())?
> > I wanted to avoid the fence before "*p=pte" which in case of
> > write_atomic() will be present.
>
> Well, this goes back to write_atomic() resolving to write{b,w,l,q}()
> for
> unclear reasons; even the comment in our atomic.h says "For some
> reason".
> The fence there is of course getting in the way here. I keep
> forgetting
> about that aspect, because neither x86 nor Arm has anything similar
> afaics.
Good point. I overlooked something here ( and misinterpreted your
comments regarding that write_atomic() implementation ) but I think it
would be better to use write{b,w,l,q}_cpu() for write_atomic.
~ Oleksii
>
> > Won't it be enough to use here WRITE_ONCE()?
>
> Maybe. I'm not entirely sure.
>
> Jan
© 2016 - 2026 Red Hat, Inc.