This mechanism allows to completely bypass the sfence.vma introduced by
the previous commit for uarchs that do not cache invalid TLB entries.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 124 insertions(+)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 379403de6c6f..2e854613740c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
EXPORT_SYMBOL(pgtable_l4_enabled);
EXPORT_SYMBOL(pgtable_l5_enabled);
+bool tlb_caching_invalid_entries;
+
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
@@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
satp_mode = SATP_MODE_39;
}
+static void __init enable_pgtable_l5(void)
+{
+ pgtable_l5_enabled = true;
+ satp_mode = SATP_MODE_57;
+}
+
+static void __init enable_pgtable_l4(void)
+{
+ pgtable_l4_enabled = true;
+ satp_mode = SATP_MODE_48;
+}
+
static int __init print_no4lvl(char *p)
{
pr_info("Disabled 4-level and 5-level paging");
@@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
memset(early_pud, 0, PAGE_SIZE);
memset(early_pmd, 0, PAGE_SIZE);
}
+
+/* Determine at runtime if the uarch caches invalid TLB entries */
+static __init void set_tlb_caching_invalid_entries(void)
+{
+#define NR_RETRIES_CACHING_INVALID_ENTRIES 50
+ uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
+ // TODO the test_addr as defined below could go into another pud...
+ uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
+ pmd_t valid_pmd;
+ u64 satp;
+ int i = 0;
+
+ /* To ease the page table creation */
+ disable_pgtable_l5();
+ disable_pgtable_l4();
+
+ /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
+ create_pgd_mapping(early_pg_dir,
+ set_tlb_caching_invalid_entries_pmd,
+ (uintptr_t)early_pmd,
+ PGDIR_SIZE, PAGE_TABLE);
+
+ /* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
+ create_pmd_mapping(early_pmd,
+ set_tlb_caching_invalid_entries_pmd,
+ set_tlb_caching_invalid_entries_pmd,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+ create_pmd_mapping(early_pmd,
+ set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+ set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ /* Establish an invalid mapping */
+ create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
+
+ /* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
+ valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
+
+ local_flush_tlb_all();
+ satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
+ csr_write(CSR_SATP, satp);
+
+ /*
+ * Set stvec to after the trapping access, access this invalid mapping
+ * and legitimately trap
+ */
+ // TODO: Should I save the previous stvec?
+#define ASM_STR(x) __ASM_STR(x)
+ asm volatile(
+ "la a0, 1f \n"
+ "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
+ "ld a0, 0(%0) \n"
+ ".align 2 \n"
+ "1: \n"
+ :
+ : "r" (test_addr)
+ : "a0"
+ );
+
+ /* Now establish a valid mapping to check if the invalid one is cached */
+ early_pmd[pmd_index(test_addr)] = valid_pmd;
+
+ /*
+ * Access the valid mapping multiple times: indeed, we can't use
+ * sfence.vma as a barrier to make sure the cpu did not reorder accesses
+ * so we may trap even if the uarch does not cache invalid entries. By
+ * trying a few times, we make sure that those uarchs will see the right
+ * mapping at some point.
+ */
+
+ i = NR_RETRIES_CACHING_INVALID_ENTRIES;
+
+#define ASM_STR(x) __ASM_STR(x)
+ asm_volatile_goto(
+ "la a0, 1f \n"
+ "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
+ ".align 2 \n"
+ "1: \n"
+ "addi %0, %0, -1 \n"
+ "blt %0, zero, %l[caching_invalid_entries] \n"
+ "ld a0, 0(%1) \n"
+ :
+ : "r" (i), "r" (test_addr)
+ : "a0"
+ : caching_invalid_entries
+ );
+
+ csr_write(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ /* If we don't trap, the uarch does not cache invalid entries! */
+ tlb_caching_invalid_entries = false;
+ goto clean;
+
+caching_invalid_entries:
+ csr_write(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ tlb_caching_invalid_entries = true;
+clean:
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_pmd, 0, PAGE_SIZE);
+
+ enable_pgtable_l4();
+ enable_pgtable_l5();
+}
#endif
/*
@@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
#endif
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+ set_tlb_caching_invalid_entries();
set_satp_mode(dtb_pa);
#endif
@@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
local_flush_tlb_all();
pt_ops_set_late();
+
+ pr_info("uarch caches invalid entries: %s",
+ tlb_caching_invalid_entries ? "yes" : "no");
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
--
2.39.2
Le 07/12/2023 à 16:03, Alexandre Ghiti a écrit :
> This mechanism allows to completely bypass the sfence.vma introduced by
> the previous commit for uarchs that do not cache invalid TLB entries.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 124 insertions(+)
>
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 379403de6c6f..2e854613740c 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
> EXPORT_SYMBOL(pgtable_l4_enabled);
> EXPORT_SYMBOL(pgtable_l5_enabled);
>
> +bool tlb_caching_invalid_entries;
> +
> phys_addr_t phys_ram_base __ro_after_init;
> EXPORT_SYMBOL(phys_ram_base);
>
> @@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
> satp_mode = SATP_MODE_39;
> }
>
> +static void __init enable_pgtable_l5(void)
> +{
> + pgtable_l5_enabled = true;
> + satp_mode = SATP_MODE_57;
> +}
> +
> +static void __init enable_pgtable_l4(void)
> +{
> + pgtable_l4_enabled = true;
> + satp_mode = SATP_MODE_48;
> +}
> +
> static int __init print_no4lvl(char *p)
> {
> pr_info("Disabled 4-level and 5-level paging");
> @@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
> memset(early_pud, 0, PAGE_SIZE);
> memset(early_pmd, 0, PAGE_SIZE);
> }
> +
> +/* Determine at runtime if the uarch caches invalid TLB entries */
> +static __init void set_tlb_caching_invalid_entries(void)
> +{
> +#define NR_RETRIES_CACHING_INVALID_ENTRIES 50
Looks odd to have macros nested in the middle of a function.
> + uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
> + // TODO the test_addr as defined below could go into another pud...
> + uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
> + pmd_t valid_pmd;
> + u64 satp;
> + int i = 0;
> +
> + /* To ease the page table creation */
> + disable_pgtable_l5();
> + disable_pgtable_l4();
> +
> + /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
> + create_pgd_mapping(early_pg_dir,
> + set_tlb_caching_invalid_entries_pmd,
> + (uintptr_t)early_pmd,
> + PGDIR_SIZE, PAGE_TABLE);
> +
> + /* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
> + create_pmd_mapping(early_pmd,
> + set_tlb_caching_invalid_entries_pmd,
> + set_tlb_caching_invalid_entries_pmd,
> + PMD_SIZE, PAGE_KERNEL_EXEC);
> + create_pmd_mapping(early_pmd,
> + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> + PMD_SIZE, PAGE_KERNEL_EXEC);
> +
> + /* Establish an invalid mapping */
> + create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
> +
> + /* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
> + valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
> +
> + local_flush_tlb_all();
> + satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
> + csr_write(CSR_SATP, satp);
> +
> + /*
> + * Set stvec to after the trapping access, access this invalid mapping
> + * and legitimately trap
> + */
> + // TODO: Should I save the previous stvec?
> +#define ASM_STR(x) __ASM_STR(x)
Looks odd to have macros nested in the middle of a function.
> + asm volatile(
> + "la a0, 1f \n"
> + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> + "ld a0, 0(%0) \n"
> + ".align 2 \n"
> + "1: \n"
> + :
> + : "r" (test_addr)
> + : "a0"
> + );
> +
> + /* Now establish a valid mapping to check if the invalid one is cached */
> + early_pmd[pmd_index(test_addr)] = valid_pmd;
> +
> + /*
> + * Access the valid mapping multiple times: indeed, we can't use
> + * sfence.vma as a barrier to make sure the cpu did not reorder accesses
> + * so we may trap even if the uarch does not cache invalid entries. By
> + * trying a few times, we make sure that those uarchs will see the right
> + * mapping at some point.
> + */
> +
> + i = NR_RETRIES_CACHING_INVALID_ENTRIES;
> +
> +#define ASM_STR(x) __ASM_STR(x)
Deplicate define ?
> + asm_volatile_goto(
> + "la a0, 1f \n"
> + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> + ".align 2 \n"
> + "1: \n"
> + "addi %0, %0, -1 \n"
> + "blt %0, zero, %l[caching_invalid_entries] \n"
> + "ld a0, 0(%1) \n"
> + :
> + : "r" (i), "r" (test_addr)
> + : "a0"
> + : caching_invalid_entries
> + );
> +
> + csr_write(CSR_SATP, 0ULL);
> + local_flush_tlb_all();
> +
> + /* If we don't trap, the uarch does not cache invalid entries! */
> + tlb_caching_invalid_entries = false;
> + goto clean;
> +
> +caching_invalid_entries:
> + csr_write(CSR_SATP, 0ULL);
> + local_flush_tlb_all();
> +
> + tlb_caching_invalid_entries = true;
> +clean:
> + memset(early_pg_dir, 0, PAGE_SIZE);
> + memset(early_pmd, 0, PAGE_SIZE);
Use clear_page() instead ?
> +
> + enable_pgtable_l4();
> + enable_pgtable_l5();
> +}
> #endif
>
> /*
> @@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> #endif
>
> #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
> + set_tlb_caching_invalid_entries();
> set_satp_mode(dtb_pa);
> #endif
>
> @@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
> local_flush_tlb_all();
>
> pt_ops_set_late();
> +
> + pr_info("uarch caches invalid entries: %s",
> + tlb_caching_invalid_entries ? "yes" : "no");
> }
> #else
> asmlinkage void __init setup_vm(uintptr_t dtb_pa)
On Thu, Dec 7, 2023 at 4:55 PM Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 07/12/2023 à 16:03, Alexandre Ghiti a écrit :
> > This mechanism allows to completely bypass the sfence.vma introduced by
> > the previous commit for uarchs that do not cache invalid TLB entries.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 124 insertions(+)
> >
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 379403de6c6f..2e854613740c 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
> > EXPORT_SYMBOL(pgtable_l4_enabled);
> > EXPORT_SYMBOL(pgtable_l5_enabled);
> >
> > +bool tlb_caching_invalid_entries;
> > +
> > phys_addr_t phys_ram_base __ro_after_init;
> > EXPORT_SYMBOL(phys_ram_base);
> >
> > @@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
> > satp_mode = SATP_MODE_39;
> > }
> >
> > +static void __init enable_pgtable_l5(void)
> > +{
> > + pgtable_l5_enabled = true;
> > + satp_mode = SATP_MODE_57;
> > +}
> > +
> > +static void __init enable_pgtable_l4(void)
> > +{
> > + pgtable_l4_enabled = true;
> > + satp_mode = SATP_MODE_48;
> > +}
> > +
> > static int __init print_no4lvl(char *p)
> > {
> > pr_info("Disabled 4-level and 5-level paging");
> > @@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
> > memset(early_pud, 0, PAGE_SIZE);
> > memset(early_pmd, 0, PAGE_SIZE);
> > }
> > +
> > +/* Determine at runtime if the uarch caches invalid TLB entries */
> > +static __init void set_tlb_caching_invalid_entries(void)
> > +{
> > +#define NR_RETRIES_CACHING_INVALID_ENTRIES 50
>
> Looks odd to have macros nested in the middle of a function.
>
> > + uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
> > + // TODO the test_addr as defined below could go into another pud...
> > + uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
> > + pmd_t valid_pmd;
> > + u64 satp;
> > + int i = 0;
> > +
> > + /* To ease the page table creation */
> > + disable_pgtable_l5();
> > + disable_pgtable_l4();
> > +
> > + /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
> > + create_pgd_mapping(early_pg_dir,
> > + set_tlb_caching_invalid_entries_pmd,
> > + (uintptr_t)early_pmd,
> > + PGDIR_SIZE, PAGE_TABLE);
> > +
> > + /* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
> > + create_pmd_mapping(early_pmd,
> > + set_tlb_caching_invalid_entries_pmd,
> > + set_tlb_caching_invalid_entries_pmd,
> > + PMD_SIZE, PAGE_KERNEL_EXEC);
> > + create_pmd_mapping(early_pmd,
> > + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> > + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> > + PMD_SIZE, PAGE_KERNEL_EXEC);
> > +
> > + /* Establish an invalid mapping */
> > + create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
> > +
> > + /* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
> > + valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
> > +
> > + local_flush_tlb_all();
> > + satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
> > + csr_write(CSR_SATP, satp);
> > +
> > + /*
> > + * Set stvec to after the trapping access, access this invalid mapping
> > + * and legitimately trap
> > + */
> > + // TODO: Should I save the previous stvec?
> > +#define ASM_STR(x) __ASM_STR(x)
>
> Looks odd to have macros nested in the middle of a function.
>
>
> > + asm volatile(
> > + "la a0, 1f \n"
> > + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> > + "ld a0, 0(%0) \n"
> > + ".align 2 \n"
> > + "1: \n"
> > + :
> > + : "r" (test_addr)
> > + : "a0"
> > + );
> > +
> > + /* Now establish a valid mapping to check if the invalid one is cached */
> > + early_pmd[pmd_index(test_addr)] = valid_pmd;
> > +
> > + /*
> > + * Access the valid mapping multiple times: indeed, we can't use
> > + * sfence.vma as a barrier to make sure the cpu did not reorder accesses
> > + * so we may trap even if the uarch does not cache invalid entries. By
> > + * trying a few times, we make sure that those uarchs will see the right
> > + * mapping at some point.
> > + */
> > +
> > + i = NR_RETRIES_CACHING_INVALID_ENTRIES;
> > +
> > +#define ASM_STR(x) __ASM_STR(x)
>
> Deplicate define ?
>
> > + asm_volatile_goto(
> > + "la a0, 1f \n"
> > + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> > + ".align 2 \n"
> > + "1: \n"
> > + "addi %0, %0, -1 \n"
> > + "blt %0, zero, %l[caching_invalid_entries] \n"
> > + "ld a0, 0(%1) \n"
> > + :
> > + : "r" (i), "r" (test_addr)
> > + : "a0"
> > + : caching_invalid_entries
> > + );
> > +
> > + csr_write(CSR_SATP, 0ULL);
> > + local_flush_tlb_all();
> > +
> > + /* If we don't trap, the uarch does not cache invalid entries! */
> > + tlb_caching_invalid_entries = false;
> > + goto clean;
> > +
> > +caching_invalid_entries:
> > + csr_write(CSR_SATP, 0ULL);
> > + local_flush_tlb_all();
> > +
> > + tlb_caching_invalid_entries = true;
> > +clean:
> > + memset(early_pg_dir, 0, PAGE_SIZE);
> > + memset(early_pmd, 0, PAGE_SIZE);
>
> Use clear_page() instead ?
>
> > +
> > + enable_pgtable_l4();
> > + enable_pgtable_l5();
> > +}
> > #endif
> >
> > /*
> > @@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> > #endif
> >
> > #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
> > + set_tlb_caching_invalid_entries();
> > set_satp_mode(dtb_pa);
> > #endif
> >
> > @@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
> > local_flush_tlb_all();
> >
> > pt_ops_set_late();
> > +
> > + pr_info("uarch caches invalid entries: %s",
> > + tlb_caching_invalid_entries ? "yes" : "no");
> > }
> > #else
> > asmlinkage void __init setup_vm(uintptr_t dtb_pa)
I left this patch so that people can easily test this without knowing
what their uarch is actually doing, but it will very likely be dropped
as a new extension has just been proposed for that.
Thanks anyway, I should have been more clear in the patch title,
Alex
© 2016 - 2025 Red Hat, Inc.