arch/riscv/include/asm/page.h | 4 ++++ arch/riscv/include/asm/pgtable.h | 4 +++- arch/riscv/mm/init.c | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-)
In sparse vmemmap model, the virtual address of vmemmap is calculated as:
((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)).
And the struct page's va can be calculated with an offset:
(vmemmap + (pfn)).
However, when initializing struct pages, kernel actually starts from the
first page from the same section that phys_ram_base belongs to. If the
first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then
we get an va below VMEMMAP_START when calculating va for it's struct page.
For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the
first page in the same section is actually pfn 0x80000. During
init_unavailable_range(), we will initialize struct page for pfn 0x80000
with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is
below VMEMMAP_START as well as PCI_IO_END.
This commit fixes this bug by introducing a new variable
'vmemmap_start_pfn' which is aligned with memory section size and using
it to calculate vmemmap address instead of phys_ram_base.
Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix")
Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
arch/riscv/include/asm/page.h | 4 ++++
arch/riscv/include/asm/pgtable.h | 4 +++-
arch/riscv/mm/init.c | 18 ++++++++++++++++++
3 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 71aabc5c6713..a1be1adcfb85 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -123,6 +123,10 @@ struct kernel_mapping {
extern struct kernel_mapping kernel_map;
extern phys_addr_t phys_ram_base;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+extern unsigned long vmemmap_start_pfn;
+#endif
+
#define is_kernel_mapping(x) \
((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index d4e99eef90ac..e2dbd4b9a686 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -87,7 +87,9 @@
* Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
*/
-#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
+#endif
#define PCI_IO_SIZE SZ_16M
#define PCI_IO_END VMEMMAP_START
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0e8c20adcd98..e7c52d647f50 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -32,6 +32,9 @@
#include <asm/numa.h>
#include <asm/pgtable.h>
#include <asm/sections.h>
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#include <asm/sparsemem.h>
+#endif
#include <asm/soc.h>
#include <asm/tlbflush.h>
@@ -62,6 +65,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
+
+unsigned long vmemmap_start_pfn __ro_after_init;
+EXPORT_SYMBOL(vmemmap_start_pfn);
+#endif
+
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -243,6 +253,11 @@ static void __init setup_bootmem(void)
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+ vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
+
/*
* In 64-bit, any use of __va/__pa before this point is wrong as we
* did not know the start of DRAM before.
@@ -1101,6 +1116,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
phys_ram_base = CONFIG_PHYS_RAM_BASE;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
--
2.20.1
Minor nits below; Too many ifdefs.
Xu Lu <luxu.kernel@bytedance.com> writes:
> In sparse vmemmap model, the virtual address of vmemmap is calculated as:
> ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)).
> And the struct page's va can be calculated with an offset:
> (vmemmap + (pfn)).
>
> However, when initializing struct pages, kernel actually starts from the
> first page from the same section that phys_ram_base belongs to. If the
> first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then
> we get an va below VMEMMAP_START when calculating va for it's struct page.
>
> For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the
> first page in the same section is actually pfn 0x80000. During
> init_unavailable_range(), we will initialize struct page for pfn 0x80000
> with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is
> below VMEMMAP_START as well as PCI_IO_END.
>
> This commit fixes this bug by introducing a new variable
> 'vmemmap_start_pfn' which is aligned with memory section size and using
> it to calculate vmemmap address instead of phys_ram_base.
>
> Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix")
> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> ---
> arch/riscv/include/asm/page.h | 4 ++++
> arch/riscv/include/asm/pgtable.h | 4 +++-
> arch/riscv/mm/init.c | 18 ++++++++++++++++++
> 3 files changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 71aabc5c6713..a1be1adcfb85 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -123,6 +123,10 @@ struct kernel_mapping {
> extern struct kernel_mapping kernel_map;
> extern phys_addr_t phys_ram_base;
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
Not needed, and should be removed.
> +extern unsigned long vmemmap_start_pfn;
> +#endif
> +
> #define is_kernel_mapping(x) \
> ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
>
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index d4e99eef90ac..e2dbd4b9a686 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -87,7 +87,9 @@
> * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
> * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
> */
> -#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
Dito, please remove.
> +#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
> +#endif
>
> #define PCI_IO_SIZE SZ_16M
> #define PCI_IO_END VMEMMAP_START
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 0e8c20adcd98..e7c52d647f50 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -32,6 +32,9 @@
> #include <asm/numa.h>
> #include <asm/pgtable.h>
> #include <asm/sections.h>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
Not needed, please remove.
> +#include <asm/sparsemem.h>
> +#endif
> #include <asm/soc.h>
> #include <asm/tlbflush.h>
>
> @@ -62,6 +65,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
> phys_addr_t phys_ram_base __ro_after_init;
> EXPORT_SYMBOL(phys_ram_base);
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
> +
> +unsigned long vmemmap_start_pfn __ro_after_init;
> +EXPORT_SYMBOL(vmemmap_start_pfn);
> +#endif
> +
> unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
> __page_aligned_bss;
> EXPORT_SYMBOL(empty_zero_page);
> @@ -243,6 +253,11 @@ static void __init setup_bootmem(void)
> if (!IS_ENABLED(CONFIG_XIP_KERNEL))
> phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> + if (!IS_ENABLED(CONFIG_XIP_KERNEL))
> + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
> +#endif
> +
Please fold this into the if-statement above, e.g.:
| if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
| phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
| #if defined(CONFIG_SPARSEMEM_VMEMMAP)
| vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
| #endif
| }
Björn
Hi Björn,
Thanks for your comment. Is it OK to introduce the new and unused
variable 'vmemmap_start_pfn' when CONFIG_SPARSEMEM_VMEMMAP is
disabled?
On Mon, Dec 9, 2024 at 7:03 PM Björn Töpel <bjorn@kernel.org> wrote:
>
> Minor nits below; Too many ifdefs.
>
> Xu Lu <luxu.kernel@bytedance.com> writes:
>
> > In sparse vmemmap model, the virtual address of vmemmap is calculated as:
> > ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)).
> > And the struct page's va can be calculated with an offset:
> > (vmemmap + (pfn)).
> >
> > However, when initializing struct pages, kernel actually starts from the
> > first page from the same section that phys_ram_base belongs to. If the
> > first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then
> > we get an va below VMEMMAP_START when calculating va for it's struct page.
> >
> > For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the
> > first page in the same section is actually pfn 0x80000. During
> > init_unavailable_range(), we will initialize struct page for pfn 0x80000
> > with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is
> > below VMEMMAP_START as well as PCI_IO_END.
> >
> > This commit fixes this bug by introducing a new variable
> > 'vmemmap_start_pfn' which is aligned with memory section size and using
> > it to calculate vmemmap address instead of phys_ram_base.
> >
> > Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix")
> > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > ---
> > arch/riscv/include/asm/page.h | 4 ++++
> > arch/riscv/include/asm/pgtable.h | 4 +++-
> > arch/riscv/mm/init.c | 18 ++++++++++++++++++
> > 3 files changed, 25 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> > index 71aabc5c6713..a1be1adcfb85 100644
> > --- a/arch/riscv/include/asm/page.h
> > +++ b/arch/riscv/include/asm/page.h
> > @@ -123,6 +123,10 @@ struct kernel_mapping {
> > extern struct kernel_mapping kernel_map;
> > extern phys_addr_t phys_ram_base;
> >
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
>
> Not needed, and should be removed.
>
> > +extern unsigned long vmemmap_start_pfn;
> > +#endif
> > +
> > #define is_kernel_mapping(x) \
> > ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
> >
> > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > index d4e99eef90ac..e2dbd4b9a686 100644
> > --- a/arch/riscv/include/asm/pgtable.h
> > +++ b/arch/riscv/include/asm/pgtable.h
> > @@ -87,7 +87,9 @@
> > * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
> > * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
> > */
> > -#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
>
> Dito, please remove.
>
> > +#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
> > +#endif
> >
> > #define PCI_IO_SIZE SZ_16M
> > #define PCI_IO_END VMEMMAP_START
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 0e8c20adcd98..e7c52d647f50 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -32,6 +32,9 @@
> > #include <asm/numa.h>
> > #include <asm/pgtable.h>
> > #include <asm/sections.h>
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
>
> Not needed, please remove.
>
> > +#include <asm/sparsemem.h>
> > +#endif
> > #include <asm/soc.h>
> > #include <asm/tlbflush.h>
> >
> > @@ -62,6 +65,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
> > phys_addr_t phys_ram_base __ro_after_init;
> > EXPORT_SYMBOL(phys_ram_base);
> >
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > +#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
> > +
> > +unsigned long vmemmap_start_pfn __ro_after_init;
> > +EXPORT_SYMBOL(vmemmap_start_pfn);
> > +#endif
> > +
> > unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
> > __page_aligned_bss;
> > EXPORT_SYMBOL(empty_zero_page);
> > @@ -243,6 +253,11 @@ static void __init setup_bootmem(void)
> > if (!IS_ENABLED(CONFIG_XIP_KERNEL))
> > phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
> >
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > + if (!IS_ENABLED(CONFIG_XIP_KERNEL))
> > + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
> > +#endif
> > +
>
> Please fold this into the if-statement above, e.g.:
> | if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
> | phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
> | #if defined(CONFIG_SPARSEMEM_VMEMMAP)
> | vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
> | #endif
> | }
>
>
> Björn
Best Regards,
Xu Lu
Hi,
On Mon, 9 Dec 2024 at 12:25, Xu Lu <luxu.kernel@bytedance.com> wrote:
>
> Hi Björn,
>
> Thanks for your comment. Is it OK to introduce the new and unused
> variable 'vmemmap_start_pfn' when CONFIG_SPARSEMEM_VMEMMAP is
> disabled?
The declaration, yes. Definition, no!
Cheers,
Björn
> On Mon, Dec 9, 2024 at 7:03 PM Björn Töpel <bjorn@kernel.org> wrote:
> >
> > Minor nits below; Too many ifdefs.
> >
> > Xu Lu <luxu.kernel@bytedance.com> writes:
> >
> > > In sparse vmemmap model, the virtual address of vmemmap is calculated as:
> > > ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)).
> > > And the struct page's va can be calculated with an offset:
> > > (vmemmap + (pfn)).
> > >
> > > However, when initializing struct pages, kernel actually starts from the
> > > first page from the same section that phys_ram_base belongs to. If the
> > > first page's physical address is not (phys_ram_base >> PAGE_SHIFT), then
> > > we get an va below VMEMMAP_START when calculating va for it's struct page.
> > >
> > > For example, if phys_ram_base starts from 0x82000000 with pfn 0x82000, the
> > > first page in the same section is actually pfn 0x80000. During
> > > init_unavailable_range(), we will initialize struct page for pfn 0x80000
> > > with virtual address ((struct page *)VMEMMAP_START - 0x2000), which is
> > > below VMEMMAP_START as well as PCI_IO_END.
> > >
> > > This commit fixes this bug by introducing a new variable
> > > 'vmemmap_start_pfn' which is aligned with memory section size and using
> > > it to calculate vmemmap address instead of phys_ram_base.
> > >
> > > Fixes: a11dd49dcb93 ("riscv: Sparse-Memory/vmemmap out-of-bounds fix")
> > > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > > ---
> > > arch/riscv/include/asm/page.h | 4 ++++
> > > arch/riscv/include/asm/pgtable.h | 4 +++-
> > > arch/riscv/mm/init.c | 18 ++++++++++++++++++
> > > 3 files changed, 25 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> > > index 71aabc5c6713..a1be1adcfb85 100644
> > > --- a/arch/riscv/include/asm/page.h
> > > +++ b/arch/riscv/include/asm/page.h
> > > @@ -123,6 +123,10 @@ struct kernel_mapping {
> > > extern struct kernel_mapping kernel_map;
> > > extern phys_addr_t phys_ram_base;
> > >
> > > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> >
> > Not needed, and should be removed.
> >
> > > +extern unsigned long vmemmap_start_pfn;
> > > +#endif
> > > +
> > > #define is_kernel_mapping(x) \
> > > ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
> > >
> > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > > index d4e99eef90ac..e2dbd4b9a686 100644
> > > --- a/arch/riscv/include/asm/pgtable.h
> > > +++ b/arch/riscv/include/asm/pgtable.h
> > > @@ -87,7 +87,9 @@
> > > * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
> > > * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
> > > */
> > > -#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
> > > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> >
> > Dito, please remove.
> >
> > > +#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
> > > +#endif
> > >
> > > #define PCI_IO_SIZE SZ_16M
> > > #define PCI_IO_END VMEMMAP_START
> > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > index 0e8c20adcd98..e7c52d647f50 100644
> > > --- a/arch/riscv/mm/init.c
> > > +++ b/arch/riscv/mm/init.c
> > > @@ -32,6 +32,9 @@
> > > #include <asm/numa.h>
> > > #include <asm/pgtable.h>
> > > #include <asm/sections.h>
> > > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> >
> > Not needed, please remove.
> >
> > > +#include <asm/sparsemem.h>
> > > +#endif
> > > #include <asm/soc.h>
> > > #include <asm/tlbflush.h>
> > >
> > > @@ -62,6 +65,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
> > > phys_addr_t phys_ram_base __ro_after_init;
> > > EXPORT_SYMBOL(phys_ram_base);
> > >
> > > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > > +#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
> > > +
> > > +unsigned long vmemmap_start_pfn __ro_after_init;
> > > +EXPORT_SYMBOL(vmemmap_start_pfn);
> > > +#endif
> > > +
> > > unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
> > > __page_aligned_bss;
> > > EXPORT_SYMBOL(empty_zero_page);
> > > @@ -243,6 +253,11 @@ static void __init setup_bootmem(void)
> > > if (!IS_ENABLED(CONFIG_XIP_KERNEL))
> > > phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
> > >
> > > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > > + if (!IS_ENABLED(CONFIG_XIP_KERNEL))
> > > + vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
> > > +#endif
> > > +
> >
> > Please fold this into the if-statement above, e.g.:
> > | if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
> > | phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
> > | #if defined(CONFIG_SPARSEMEM_VMEMMAP)
> > | vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
> > | #endif
> > | }
> >
> >
> > Björn
>
> Best Regards,
>
> Xu Lu
© 2016 - 2025 Red Hat, Inc.