[for 4.22 v5 01/18] xen/riscv: detect and initialize G-stage mode

Oleksii Kurochko posted 18 patches 1 week, 2 days ago
[for 4.22 v5 01/18] xen/riscv: detect and initialize G-stage mode
Posted by Oleksii Kurochko 1 week, 2 days ago
Introduce gstage_mode_detect() and pre_gstage_init() to probe supported
G-stage paging modes at boot. The function iterates over possible
HGATP modes (Sv32x4 on RV32, Sv39x4/Sv48x4/Sv57x4 on RV64) and selects
the first valid one by programming CSR_HGATP and reading it back.

The selected mode is stored in gstage_mode (marked __ro_after_init)
and reported via printk. If no supported mode is found, Xen panics
since Bare mode is not expected to be used.

Finally, CSR_HGATP is cleared and a local_hfence_gvma_all() is issued
to avoid any potential speculative pollution of the TLB, as required
by the RISC-V spec.

The following issue starts to occur:
 ./<riscv>/asm/flushtlb.h:37:55: error: 'struct page_info'  declared inside
   parameter list will not be visible outside of this definition or
   declaration [-Werror]
 37 | static inline void page_set_tlbflush_timestamp(struct page_info *page)
To resolve it, forward declaration of struct page_info is added to
<asm/flushtlb.h.

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com>
---
Changes in V5:
 - Add static and __initconst for local variable modes[] in
   gstage_mode_detect().
 - Change type for gstage_mode from 'unsigned long' to 'unsigned char'.
 - Update the comment inisde defintion if modes[] variable in
   gstage_mode_detect():
   - Add information about Bare mode.
   - Drop "a paged virtual-memory scheme described in Section 10.3" as it isn't
     relevant here.
 - Drop printing of function name when chosen G-stage mode message is printed.
 - Drop the call of gstage_mode_detect() from start_xen(). It will be added into
   p2m_init() when the latter will be introduced.
 - Introduce pre_gstage_init().
 - make gstage_mode_detect() static.
---
Changes in V4:
 - New patch.
---
 xen/arch/riscv/Makefile                     |  1 +
 xen/arch/riscv/include/asm/flushtlb.h       |  7 ++
 xen/arch/riscv/include/asm/p2m.h            |  4 +
 xen/arch/riscv/include/asm/riscv_encoding.h |  5 ++
 xen/arch/riscv/p2m.c                        | 96 +++++++++++++++++++++
 xen/arch/riscv/setup.c                      |  3 +
 6 files changed, 116 insertions(+)
 create mode 100644 xen/arch/riscv/p2m.c

diff --git a/xen/arch/riscv/Makefile b/xen/arch/riscv/Makefile
index e2b8aa42c8..264e265699 100644
--- a/xen/arch/riscv/Makefile
+++ b/xen/arch/riscv/Makefile
@@ -7,6 +7,7 @@ obj-y += intc.o
 obj-y += irq.o
 obj-y += mm.o
 obj-y += pt.o
+obj-y += p2m.o
 obj-$(CONFIG_RISCV_64) += riscv64/
 obj-y += sbi.o
 obj-y += setup.o
diff --git a/xen/arch/riscv/include/asm/flushtlb.h b/xen/arch/riscv/include/asm/flushtlb.h
index 51c8f753c5..e70badae0c 100644
--- a/xen/arch/riscv/include/asm/flushtlb.h
+++ b/xen/arch/riscv/include/asm/flushtlb.h
@@ -7,6 +7,13 @@
 
 #include <asm/sbi.h>
 
+struct page_info;
+
+static inline void local_hfence_gvma_all(void)
+{
+    asm volatile ( "hfence.gvma zero, zero" ::: "memory" );
+}
+
 /* Flush TLB of local processor for address va. */
 static inline void flush_tlb_one_local(vaddr_t va)
 {
diff --git a/xen/arch/riscv/include/asm/p2m.h b/xen/arch/riscv/include/asm/p2m.h
index e43c559e0c..3a5066f360 100644
--- a/xen/arch/riscv/include/asm/p2m.h
+++ b/xen/arch/riscv/include/asm/p2m.h
@@ -6,6 +6,8 @@
 
 #include <asm/page-bits.h>
 
+extern unsigned char gstage_mode;
+
 #define paddr_bits PADDR_BITS
 
 /*
@@ -88,6 +90,8 @@ static inline bool arch_acquire_resource_check(struct domain *d)
     return false;
 }
 
+void pre_gstage_init(void);
+
 #endif /* ASM__RISCV__P2M_H */
 
 /*
diff --git a/xen/arch/riscv/include/asm/riscv_encoding.h b/xen/arch/riscv/include/asm/riscv_encoding.h
index 6cc8f4eb45..b15f5ad0b4 100644
--- a/xen/arch/riscv/include/asm/riscv_encoding.h
+++ b/xen/arch/riscv/include/asm/riscv_encoding.h
@@ -131,13 +131,16 @@
 #define HGATP_MODE_SV32X4		_UL(1)
 #define HGATP_MODE_SV39X4		_UL(8)
 #define HGATP_MODE_SV48X4		_UL(9)
+#define HGATP_MODE_SV57X4		_UL(10)
 
 #define HGATP32_MODE_SHIFT		31
+#define HGATP32_MODE_MASK		_UL(0x80000000)
 #define HGATP32_VMID_SHIFT		22
 #define HGATP32_VMID_MASK		_UL(0x1FC00000)
 #define HGATP32_PPN			_UL(0x003FFFFF)
 
 #define HGATP64_MODE_SHIFT		60
+#define HGATP64_MODE_MASK		_ULL(0xF000000000000000)
 #define HGATP64_VMID_SHIFT		44
 #define HGATP64_VMID_MASK		_ULL(0x03FFF00000000000)
 #define HGATP64_PPN			_ULL(0x00000FFFFFFFFFFF)
@@ -170,6 +173,7 @@
 #define HGATP_VMID_SHIFT		HGATP64_VMID_SHIFT
 #define HGATP_VMID_MASK			HGATP64_VMID_MASK
 #define HGATP_MODE_SHIFT		HGATP64_MODE_SHIFT
+#define HGATP_MODE_MASK			HGATP64_MODE_MASK
 #else
 #define MSTATUS_SD			MSTATUS32_SD
 #define SSTATUS_SD			SSTATUS32_SD
@@ -181,6 +185,7 @@
 #define HGATP_VMID_SHIFT		HGATP32_VMID_SHIFT
 #define HGATP_VMID_MASK			HGATP32_VMID_MASK
 #define HGATP_MODE_SHIFT		HGATP32_MODE_SHIFT
+#define HGATP_MODE_MASK			HGATP32_MODE_MASK
 #endif
 
 #define TOPI_IID_SHIFT			16
diff --git a/xen/arch/riscv/p2m.c b/xen/arch/riscv/p2m.c
new file mode 100644
index 0000000000..00fe676089
--- /dev/null
+++ b/xen/arch/riscv/p2m.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/macros.h>
+#include <xen/sections.h>
+
+#include <asm/csr.h>
+#include <asm/flushtlb.h>
+#include <asm/riscv_encoding.h>
+
+unsigned char __ro_after_init gstage_mode;
+
+static void __init gstage_mode_detect(void)
+{
+    static const struct {
+        unsigned char mode;
+        unsigned int paging_levels;
+        const char name[8];
+    } modes[] __initconst = {
+        /*
+         * Based on the RISC-V spec:
+         *   Bare mode is always supported, regardless of SXLEN.
+         *   When SXLEN=32, the only other valid setting for MODE is Sv32.
+         *   When SXLEN=64, three paged virtual-memory schemes are defined:
+         *   Sv39, Sv48, and Sv57.
+         */
+#ifdef CONFIG_RISCV_32
+        { HGATP_MODE_SV32X4, 2, "Sv32x4" }
+#else
+        { HGATP_MODE_SV39X4, 3, "Sv39x4" },
+        { HGATP_MODE_SV48X4, 4, "Sv48x4" },
+        { HGATP_MODE_SV57X4, 5, "Sv57x4" },
+#endif
+    };
+
+    unsigned int mode_idx;
+
+    gstage_mode = HGATP_MODE_OFF;
+
+    for ( mode_idx = 0; mode_idx < ARRAY_SIZE(modes); mode_idx++ )
+    {
+        unsigned long mode = modes[mode_idx].mode;
+
+        csr_write(CSR_HGATP, MASK_INSR(mode, HGATP_MODE_MASK));
+
+        if ( MASK_EXTR(csr_read(CSR_HGATP), HGATP_MODE_MASK) == mode )
+        {
+            gstage_mode = mode;
+            break;
+        }
+    }
+
+    if ( gstage_mode == HGATP_MODE_OFF )
+        panic("Xen expects that G-stage won't be Bare mode\n");
+
+    printk("G-stage mode is %s\n", modes[mode_idx].name);
+
+    csr_write(CSR_HGATP, 0);
+
+    /*
+     * From RISC-V spec:
+     *   Speculative executions of the address-translation algorithm behave as
+     *   non-speculative executions of the algorithm do, except that they must
+     *   not set the dirty bit for a PTE, they must not trigger an exception,
+     *   and they must not create address-translation cache entries if those
+     *   entries would have been invalidated by any SFENCE.VMA instruction
+     *   executed by the hart since the speculative execution of the algorithm
+     *   began.
+     * The quote above mention explicitly SFENCE.VMA, but I assume it is true
+     * for HFENCE.VMA.
+     *
+     * Also, despite of the fact here it is mentioned that when V=0 two-stage
+     * address translation is inactivated:
+     *   The current virtualization mode, denoted V, indicates whether the hart
+     *   is currently executing in a guest. When V=1, the hart is either in
+     *   virtual S-mode (VS-mode), or in virtual U-mode (VU-mode) atop a guest
+     *   OS running in VS-mode. When V=0, the hart is either in M-mode, in
+     *   HS-mode, or in U-mode atop an OS running in HS-mode. The
+     *   virtualization mode also indicates whether two-stage address
+     *   translation is active (V=1) or inactive (V=0).
+     * But on the same side, writing to hgatp register activates it:
+     *   The hgatp register is considered active for the purposes of
+     *   the address-translation algorithm unless the effective privilege mode
+     *   is U and hstatus.HU=0.
+     *
+     * Thereby it leaves some room for speculation even in this stage of boot,
+     * so it could be that we polluted local TLB so flush all guest TLB.
+     */
+    local_hfence_gvma_all();
+}
+
+void __init pre_gstage_init(void)
+{
+    gstage_mode_detect();
+}
diff --git a/xen/arch/riscv/setup.c b/xen/arch/riscv/setup.c
index 483cdd7e17..c4f7793151 100644
--- a/xen/arch/riscv/setup.c
+++ b/xen/arch/riscv/setup.c
@@ -22,6 +22,7 @@
 #include <asm/early_printk.h>
 #include <asm/fixmap.h>
 #include <asm/intc.h>
+#include <asm/p2m.h>
 #include <asm/sbi.h>
 #include <asm/setup.h>
 #include <asm/traps.h>
@@ -148,6 +149,8 @@ void __init noreturn start_xen(unsigned long bootcpu_id,
 
     console_init_postirq();
 
+    pre_gstage_init();
+
     printk("All set up\n");
 
     machine_halt();
-- 
2.51.0