Set up fixmap mappings and the L0 page table for fixmap support.
Define new macros in riscv/config.h for calculating
the FIXMAP_BASE address, including BOOT_FDT_VIRT_{START, SIZE},
XEN_VIRT_SIZE, and XEN_VIRT_END.
Update the check for Xen size in riscv/lds.S to use
XEN_VIRT_SIZE instead of a hardcoded constant.
Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V4:
- move definitions of XEN_VIRT_SIZE, BOOT_FDT_VIRT_{START,SIZE}, FIXMAP_{BASE,ADDR}
below XEN_VIRT_START to have definitions appear in order.
- define FIX_LAST as (FIX_MISC + 1) to have a guard slot at the end.
- s/enumerated/numbered in the comment
- update the cycle which looks for L1 page table in setup_fixmap_mapping_function() and
the comment above him.
- drop fences inside write_pte() and put 'fence r,r' in setup_fixmap() before sfence_vma().
- update the commit message
- drop printk message inside setup_fixmap().
---
Changes in V3:
- s/XEN_SIZE/XEN_VIRT_SIZE
- drop usage of XEN_VIRT_END.
- sort newly introduced defines in config.h by address
- code style fixes
- drop runtime check of that pte is valid as it was checked in L1 page table finding cycle by BUG_ON().
- update implementation of write_pte() with FENCE rw, rw.
- add BUILD_BUG_ON() to check that amount of entries aren't bigger then entries in page table.
- drop set_fixmap, clear_fixmap declarations as they aren't used and defined now
- update the commit message.
- s/__ASM_FIXMAP_H/ASM_FIXMAP_H
- add SPDX-License-Identifier: GPL-2.0
---
xen/arch/riscv/include/asm/config.h | 8 ++++++
xen/arch/riscv/include/asm/fixmap.h | 44 +++++++++++++++++++++++++++++
xen/arch/riscv/include/asm/mm.h | 2 ++
xen/arch/riscv/include/asm/page.h | 6 ++++
xen/arch/riscv/mm.c | 43 ++++++++++++++++++++++++++++
xen/arch/riscv/setup.c | 2 ++
xen/arch/riscv/xen.lds.S | 2 +-
7 files changed, 106 insertions(+), 1 deletion(-)
create mode 100644 xen/arch/riscv/include/asm/fixmap.h
diff --git a/xen/arch/riscv/include/asm/config.h b/xen/arch/riscv/include/asm/config.h
index 50583aafdc..4b4cc529a9 100644
--- a/xen/arch/riscv/include/asm/config.h
+++ b/xen/arch/riscv/include/asm/config.h
@@ -74,6 +74,14 @@
#error "unsupported RV_STAGE1_MODE"
#endif
+#define XEN_VIRT_SIZE MB(2)
+
+#define BOOT_FDT_VIRT_START (XEN_VIRT_START + XEN_VIRT_SIZE)
+#define BOOT_FDT_VIRT_SIZE MB(4)
+
+#define FIXMAP_BASE (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
+#define FIXMAP_ADDR(n) (FIXMAP_BASE + (n) * PAGE_SIZE)
+
#define DIRECTMAP_SLOT_END 509
#define DIRECTMAP_SLOT_START 200
#define DIRECTMAP_VIRT_START SLOTN(DIRECTMAP_SLOT_START)
diff --git a/xen/arch/riscv/include/asm/fixmap.h b/xen/arch/riscv/include/asm/fixmap.h
new file mode 100644
index 0000000000..2ecd05dd9f
--- /dev/null
+++ b/xen/arch/riscv/include/asm/fixmap.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ */
+#ifndef ASM_FIXMAP_H
+#define ASM_FIXMAP_H
+
+#include <xen/bug.h>
+#include <xen/page-size.h>
+#include <xen/pmap.h>
+
+#include <asm/page.h>
+
+/* Fixmap slots */
+#define FIX_PMAP_BEGIN (0) /* Start of PMAP */
+#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */
+#define FIX_MISC (FIX_PMAP_END + 1) /* Ephemeral mappings of hardware */
+
+#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */
+
+#define FIXADDR_START FIXMAP_ADDR(0)
+#define FIXADDR_TOP FIXMAP_ADDR(FIX_LAST)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Direct access to xen_fixmap[] should only happen when {set,
+ * clear}_fixmap() is unusable (e.g. where we would end up to
+ * recursively call the helpers).
+ */
+extern pte_t xen_fixmap[];
+
+#define fix_to_virt(slot) ((void *)FIXMAP_ADDR(slot))
+
+static inline unsigned int virt_to_fix(vaddr_t vaddr)
+{
+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+
+ return ((vaddr - FIXADDR_START) >> PAGE_SHIFT);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ASM_FIXMAP_H */
diff --git a/xen/arch/riscv/include/asm/mm.h b/xen/arch/riscv/include/asm/mm.h
index 25af9e1aaa..a0bdc2bc3a 100644
--- a/xen/arch/riscv/include/asm/mm.h
+++ b/xen/arch/riscv/include/asm/mm.h
@@ -255,4 +255,6 @@ static inline unsigned int arch_get_dma_bitsize(void)
return 32; /* TODO */
}
+void setup_fixmap_mappings(void);
+
#endif /* _ASM_RISCV_MM_H */
diff --git a/xen/arch/riscv/include/asm/page.h b/xen/arch/riscv/include/asm/page.h
index c831e16417..5db3edb100 100644
--- a/xen/arch/riscv/include/asm/page.h
+++ b/xen/arch/riscv/include/asm/page.h
@@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache)
BUG_ON("unimplemented");
}
+/* Write a pagetable entry. */
+static inline void write_pte(pte_t *p, pte_t pte)
+{
+ *p = pte;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PAGE_H */
diff --git a/xen/arch/riscv/mm.c b/xen/arch/riscv/mm.c
index 7d09e781bf..b8ff91cf4e 100644
--- a/xen/arch/riscv/mm.c
+++ b/xen/arch/riscv/mm.c
@@ -12,6 +12,7 @@
#include <asm/early_printk.h>
#include <asm/csr.h>
#include <asm/current.h>
+#include <asm/fixmap.h>
#include <asm/page.h>
#include <asm/processor.h>
@@ -49,6 +50,9 @@ stage1_pgtbl_root[PAGETABLE_ENTRIES];
pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
stage1_pgtbl_nonroot[PGTBL_INITIAL_COUNT * PAGETABLE_ENTRIES];
+pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
+xen_fixmap[PAGETABLE_ENTRIES];
+
#define HANDLE_PGTBL(curr_lvl_num) \
index = pt_index(curr_lvl_num, page_addr); \
if ( pte_is_valid(pgtbl[index]) ) \
@@ -191,6 +195,45 @@ static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc,
return is_mode_supported;
}
+void __init setup_fixmap_mappings(void)
+{
+ pte_t *pte, tmp;
+ unsigned int i;
+
+ BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES);
+
+ pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))];
+
+ /*
+ * In RISC-V page table levels are numbered from Lx to L0 where
+ * x is the highest page table level for currect MMU mode ( for example,
+ * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ).
+ *
+ * In this cycle we want to find L1 page table because as L0 page table
+ * xen_fixmap[] will be used.
+ */
+ for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; )
+ {
+ BUG_ON(!pte_is_valid(*pte));
+
+ pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte));
+ pte = &pte[pt_index(i, FIXMAP_ADDR(0))];
+ }
+
+ BUG_ON(pte_is_valid(*pte));
+
+ tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE);
+ write_pte(pte, tmp);
+
+ RISCV_FENCE(rw, rw);
+ sfence_vma();
+
+ /*
+ * We only need the zeroeth table allocated, but not the PTEs set, because
+ * set_fixmap() will set them on the fly.
+ */
+}
+
/*
* setup_initial_pagetables:
*
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 4defad68f4..13f0e8c77d 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -46,6 +46,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
test_macros_from_bug_h();
#endif
+ setup_fixmap_mappings();
+
printk("All set up\n");
for ( ;; )
diff --git a/xen/arch/riscv/xen.lds.S b/xen/arch/riscv/xen.lds.S
index 070b19d915..7a683f6065 100644
--- a/xen/arch/riscv/xen.lds.S
+++ b/xen/arch/riscv/xen.lds.S
@@ -181,6 +181,6 @@ ASSERT(!SIZEOF(.got.plt), ".got.plt non-empty")
* Changing the size of Xen binary can require an update of
* PGTBL_INITIAL_COUNT.
*/
-ASSERT(_end - _start <= MB(2), "Xen too large for early-boot assumptions")
+ASSERT(_end - _start <= XEN_VIRT_SIZE, "Xen too large for early-boot assumptions")
ASSERT(_ident_end - _ident_start <= IDENT_AREA_SIZE, "identity region is too big");
--
2.45.2
On 09.08.2024 18:19, Oleksii Kurochko wrote: > --- a/xen/arch/riscv/include/asm/config.h > +++ b/xen/arch/riscv/include/asm/config.h > @@ -74,6 +74,14 @@ > #error "unsupported RV_STAGE1_MODE" > #endif > > +#define XEN_VIRT_SIZE MB(2) > + > +#define BOOT_FDT_VIRT_START (XEN_VIRT_START + XEN_VIRT_SIZE) > +#define BOOT_FDT_VIRT_SIZE MB(4) > + > +#define FIXMAP_BASE (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE) > +#define FIXMAP_ADDR(n) (FIXMAP_BASE + (n) * PAGE_SIZE) Just to confirm: Did you consider leaving gaps between the regions, but then discarded that idea for whatever reason? It's not like you're tight on address space. As to FIXMAP_ADDR() - wouldn't that be a better fit in fixmap.h? > --- /dev/null > +++ b/xen/arch/riscv/include/asm/fixmap.h > @@ -0,0 +1,44 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * fixmap.h: compile-time virtual memory allocation > + */ > +#ifndef ASM_FIXMAP_H > +#define ASM_FIXMAP_H > + > +#include <xen/bug.h> > +#include <xen/page-size.h> > +#include <xen/pmap.h> > + > +#include <asm/page.h> > + > +/* Fixmap slots */ > +#define FIX_PMAP_BEGIN (0) /* Start of PMAP */ > +#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of PMAP */ > +#define FIX_MISC (FIX_PMAP_END + 1) /* Ephemeral mappings of hardware */ > + > +#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */ As per my comment on the earlier version: No, I don't think this arranges for a guard slot. It merely arranges for FIX_MISC to not trigger the BUG_ON() in virt_to_fix(). > --- a/xen/arch/riscv/include/asm/page.h > +++ b/xen/arch/riscv/include/asm/page.h > @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned long mfn, bool sync_icache) > BUG_ON("unimplemented"); > } > > +/* Write a pagetable entry. */ > +static inline void write_pte(pte_t *p, pte_t pte) > +{ > + *p = pte; > +} No use of write_atomic() here? And no read_pte() counterpart right away (then also properly using read_atomic())? > @@ -191,6 +195,45 @@ static bool __init check_pgtbl_mode_support(struct mmu_desc *mmu_desc, > return is_mode_supported; > } > > +void __init setup_fixmap_mappings(void) > +{ > + pte_t *pte, tmp; > + unsigned int i; > + > + BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES); > + > + pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, FIXMAP_ADDR(0))]; > + > + /* > + * In RISC-V page table levels are numbered from Lx to L0 where > + * x is the highest page table level for currect MMU mode ( for example, > + * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ). > + * > + * In this cycle we want to find L1 page table because as L0 page table > + * xen_fixmap[] will be used. > + */ > + for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; ) > + { > + BUG_ON(!pte_is_valid(*pte)); > + > + pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte)); > + pte = &pte[pt_index(i, FIXMAP_ADDR(0))]; > + } > + > + BUG_ON(pte_is_valid(*pte)); > + > + tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), PTE_TABLE); > + write_pte(pte, tmp); > + > + RISCV_FENCE(rw, rw); In the changes section you say "r,r", and I was wondering about that. I take it that "rw,rw" is really what's needed here. Jan
On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote: > On 09.08.2024 18:19, Oleksii Kurochko wrote: > > --- a/xen/arch/riscv/include/asm/config.h > > +++ b/xen/arch/riscv/include/asm/config.h > > @@ -74,6 +74,14 @@ > > #error "unsupported RV_STAGE1_MODE" > > #endif > > > > +#define XEN_VIRT_SIZE MB(2) > > + > > +#define BOOT_FDT_VIRT_START (XEN_VIRT_START + XEN_VIRT_SIZE) > > +#define BOOT_FDT_VIRT_SIZE MB(4) > > + > > +#define FIXMAP_BASE (BOOT_FDT_VIRT_START + > > BOOT_FDT_VIRT_SIZE) > > +#define FIXMAP_ADDR(n) (FIXMAP_BASE + (n) * PAGE_SIZE) > > Just to confirm: Did you consider leaving gaps between the regions, > but > then discarded that idea for whatever reason? It's not like you're > tight > on address space. No, I would like to leave gaps between the regions. I have a slot gap where it is possible, inside a slot I decided just not to add this gap without any specific reason TBH. I can add some gap ( for example, 0x100000 ) if it would be better and consistent. > > As to FIXMAP_ADDR() - wouldn't that be a better fit in fixmap.h? Sure, it would more correct place for this macros. > > > --- /dev/null > > +++ b/xen/arch/riscv/include/asm/fixmap.h > > @@ -0,0 +1,44 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/* > > + * fixmap.h: compile-time virtual memory allocation > > + */ > > +#ifndef ASM_FIXMAP_H > > +#define ASM_FIXMAP_H > > + > > +#include <xen/bug.h> > > +#include <xen/page-size.h> > > +#include <xen/pmap.h> > > + > > +#include <asm/page.h> > > + > > +/* Fixmap slots */ > > +#define FIX_PMAP_BEGIN (0) /* Start of PMAP */ > > +#define FIX_PMAP_END (FIX_PMAP_BEGIN + NUM_FIX_PMAP - 1) /* End of > > PMAP */ > > +#define FIX_MISC (FIX_PMAP_END + 1) /* Ephemeral mappings of > > hardware */ > > + > > +#define FIX_LAST (FIX_MISC + 1) /* +1 means a guard slot */ > > As per my comment on the earlier version: No, I don't think this > arranges > for a guard slot. It merely arranges for FIX_MISC to not trigger the > BUG_ON() in virt_to_fix(). > > > --- a/xen/arch/riscv/include/asm/page.h > > +++ b/xen/arch/riscv/include/asm/page.h > > @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned > > long mfn, bool sync_icache) > > BUG_ON("unimplemented"); > > } > > > > +/* Write a pagetable entry. */ > > +static inline void write_pte(pte_t *p, pte_t pte) > > +{ > > + *p = pte; > > +} > > No use of write_atomic() here? And no read_pte() counterpart right > away (then > also properly using read_atomic())? I wanted to avoid the fence before "*p=pte" which in case of write_atomic() will be present. Won't it be enough to use here WRITE_ONCE()? > > > @@ -191,6 +195,45 @@ static bool __init > > check_pgtbl_mode_support(struct mmu_desc *mmu_desc, > > return is_mode_supported; > > } > > > > +void __init setup_fixmap_mappings(void) > > +{ > > + pte_t *pte, tmp; > > + unsigned int i; > > + > > + BUILD_BUG_ON(FIX_LAST >= PAGETABLE_ENTRIES); > > + > > + pte = &stage1_pgtbl_root[pt_index(HYP_PT_ROOT_LEVEL, > > FIXMAP_ADDR(0))]; > > + > > + /* > > + * In RISC-V page table levels are numbered from Lx to L0 > > where > > + * x is the highest page table level for currect MMU mode ( > > for example, > > + * for Sv39 has 3 page tables so the x = 2 (L2 -> L1 -> L0) ). > > + * > > + * In this cycle we want to find L1 page table because as L0 > > page table > > + * xen_fixmap[] will be used. > > + */ > > + for ( i = HYP_PT_ROOT_LEVEL; i-- > 1; ) > > + { > > + BUG_ON(!pte_is_valid(*pte)); > > + > > + pte = (pte_t *)LOAD_TO_LINK(pte_to_paddr(*pte)); > > + pte = &pte[pt_index(i, FIXMAP_ADDR(0))]; > > + } > > + > > + BUG_ON(pte_is_valid(*pte)); > > + > > + tmp = paddr_to_pte(LINK_TO_LOAD((unsigned long)&xen_fixmap), > > PTE_TABLE); > > + write_pte(pte, tmp); > > + > > + RISCV_FENCE(rw, rw); > > In the changes section you say "r,r", and I was wondering about that. > I > take it that "rw,rw" is really what's needed here. It was typo. I will update the change log. Thanks. ~ Oleksii
On 14.08.2024 16:21, oleksii.kurochko@gmail.com wrote: > On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote: >> On 09.08.2024 18:19, Oleksii Kurochko wrote: >>> --- a/xen/arch/riscv/include/asm/page.h >>> +++ b/xen/arch/riscv/include/asm/page.h >>> @@ -81,6 +81,12 @@ static inline void flush_page_to_ram(unsigned >>> long mfn, bool sync_icache) >>> BUG_ON("unimplemented"); >>> } >>> >>> +/* Write a pagetable entry. */ >>> +static inline void write_pte(pte_t *p, pte_t pte) >>> +{ >>> + *p = pte; >>> +} >> >> No use of write_atomic() here? And no read_pte() counterpart right >> away (then >> also properly using read_atomic())? > I wanted to avoid the fence before "*p=pte" which in case of > write_atomic() will be present. Well, this goes back to write_atomic() resolving to write{b,w,l,q}() for unclear reasons; even the comment in our atomic.h says "For some reason". The fence there is of course getting in the way here. I keep forgetting about that aspect, because neither x86 nor Arm has anything similar afaics. > Won't it be enough to use here WRITE_ONCE()? Maybe. I'm not entirely sure. Jan
On Wed, 2024-08-14 at 17:08 +0200, Jan Beulich wrote: > On 14.08.2024 16:21, oleksii.kurochko@gmail.com wrote: > > On Tue, 2024-08-13 at 10:22 +0200, Jan Beulich wrote: > > > On 09.08.2024 18:19, Oleksii Kurochko wrote: > > > > --- a/xen/arch/riscv/include/asm/page.h > > > > +++ b/xen/arch/riscv/include/asm/page.h > > > > @@ -81,6 +81,12 @@ static inline void > > > > flush_page_to_ram(unsigned > > > > long mfn, bool sync_icache) > > > > BUG_ON("unimplemented"); > > > > } > > > > > > > > +/* Write a pagetable entry. */ > > > > +static inline void write_pte(pte_t *p, pte_t pte) > > > > +{ > > > > + *p = pte; > > > > +} > > > > > > No use of write_atomic() here? And no read_pte() counterpart > > > right > > > away (then > > > also properly using read_atomic())? > > I wanted to avoid the fence before "*p=pte" which in case of > > write_atomic() will be present. > > Well, this goes back to write_atomic() resolving to write{b,w,l,q}() > for > unclear reasons; even the comment in our atomic.h says "For some > reason". > The fence there is of course getting in the way here. I keep > forgetting > about that aspect, because neither x86 nor Arm has anything similar > afaics. Good point. I overlooked something here ( and misinterpreted your comments regarding that write_atomic() implementation ) but I think it would be better to use write{b,w,l,q}_cpu() for write_atomic. ~ Oleksii > > > Won't it be enough to use here WRITE_ONCE()? > > Maybe. I'm not entirely sure. > > Jan
© 2016 - 2024 Red Hat, Inc.