arch/arm64/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
arm64 uses apply_to_page_range to change permissions for kernel VA mappings,
which does not support changing permissions for leaf mappings. This function
will change permissions until it encounters a leaf mapping, and will bail
out. To avoid this partial change, explicitly disallow changing permissions
for VM_ALLOW_HUGE_VMAP mappings.
Signed-off-by: Dev Jain <dev.jain@arm.com>
---
arch/arm64/mm/pageattr.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 39fd1f7ff02a..8337c88eec69 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages,
* we are operating on does not result in such splitting.
*
* Let's restrict ourselves to mappings created by vmalloc (or vmap).
- * Those are guaranteed to consist entirely of page mappings, and
+ * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that
* splitting is never needed.
*
* So check whether the [addr, addr + size) interval is entirely
@@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages,
area = find_vm_area((void *)addr);
if (!area ||
end > (unsigned long)kasan_reset_tag(area->addr) + area->size ||
- !(area->flags & VM_ALLOC))
+ ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
return -EINVAL;
if (!numpages)
--
2.30.2
On 3/27/25 11:21 PM, Dev Jain wrote:
> arm64 uses apply_to_page_range to change permissions for kernel VA mappings,
> which does not support changing permissions for leaf mappings. This function
> will change permissions until it encounters a leaf mapping, and will bail
> out. To avoid this partial change, explicitly disallow changing permissions
> for VM_ALLOW_HUGE_VMAP mappings.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
> arch/arm64/mm/pageattr.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
> index 39fd1f7ff02a..8337c88eec69 100644
> --- a/arch/arm64/mm/pageattr.c
> +++ b/arch/arm64/mm/pageattr.c
> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages,
> * we are operating on does not result in such splitting.
> *
> * Let's restrict ourselves to mappings created by vmalloc (or vmap).
> - * Those are guaranteed to consist entirely of page mappings, and
> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that
> * splitting is never needed.
> *
> * So check whether the [addr, addr + size) interval is entirely
> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages,
> area = find_vm_area((void *)addr);
> if (!area ||
> end > (unsigned long)kasan_reset_tag(area->addr) + area->size ||
> - !(area->flags & VM_ALLOC))
> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
> return -EINVAL;
I happened to find this patch when I was looking into fixing "splitting
is never needed" comment to reflect the latest change with BBML2_NOABORT
and tried to relax this restriction. I agree with the justification for
this patch to make the code more robust for permission update on partial
range. But the following linear mapping permission update code seems
still assume partial range update never happens:
for (i = 0; i < area->nr_pages; i++) {
It iterates all pages for this vm area from the first page then update
their permissions. So I think we should do the below to make it more
robust to partial range update like this patch did:
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -185,8 +185,9 @@ static int change_memory_common(unsigned long addr,
int numpages,
*/
if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
pgprot_val(clear_mask) == PTE_RDONLY)) {
- for (i = 0; i < area->nr_pages; i++) {
- __change_memory_common((u64)page_address(area->pages[i]),
+ unsigned long idx = (start - (unsigned long)area->addr)
>> PAGE_SHIFT;
+ for (i = 0; i < numpages; i++) {
+ __change_memory_common((u64)page_address(area->pages[idx++]),
PAGE_SIZE, set_mask,
clear_mask);
}
}
Just build tested. Does it look reasonable?
Thanks,
Yang
>
> if (!numpages)
Hi Yang,
On 09/10/2025 21:26, Yang Shi wrote:
>
>
> On 3/27/25 11:21 PM, Dev Jain wrote:
>> arm64 uses apply_to_page_range to change permissions for kernel VA mappings,
>> which does not support changing permissions for leaf mappings. This function
>> will change permissions until it encounters a leaf mapping, and will bail
>> out. To avoid this partial change, explicitly disallow changing permissions
>> for VM_ALLOW_HUGE_VMAP mappings.
>>
>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>> ---
>> arch/arm64/mm/pageattr.c | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
>> index 39fd1f7ff02a..8337c88eec69 100644
>> --- a/arch/arm64/mm/pageattr.c
>> +++ b/arch/arm64/mm/pageattr.c
>> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int
>> numpages,
>> * we are operating on does not result in such splitting.
>> *
>> * Let's restrict ourselves to mappings created by vmalloc (or vmap).
>> - * Those are guaranteed to consist entirely of page mappings, and
>> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that
>> * splitting is never needed.
>> *
>> * So check whether the [addr, addr + size) interval is entirely
>> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int
>> numpages,
>> area = find_vm_area((void *)addr);
>> if (!area ||
>> end > (unsigned long)kasan_reset_tag(area->addr) + area->size ||
>> - !(area->flags & VM_ALLOC))
>> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
>> return -EINVAL;
>
> I happened to find this patch when I was looking into fixing "splitting is never
> needed" comment to reflect the latest change with BBML2_NOABORT and tried to
> relax this restriction. I agree with the justification for this patch to make
> the code more robust for permission update on partial range. But the following
> linear mapping permission update code seems still assume partial range update
> never happens:
>
> for (i = 0; i < area->nr_pages; i++) {
>
> It iterates all pages for this vm area from the first page then update their
> permissions. So I think we should do the below to make it more robust to partial
> range update like this patch did:
Ahh so the issue is that [addr, addr + numpages * PAGE_SIZE) may only cover a
portion of the vm area? But the current code updates the permissions for the
whole vm area? Ouch...
>
> --- a/arch/arm64/mm/pageattr.c
> +++ b/arch/arm64/mm/pageattr.c
> @@ -185,8 +185,9 @@ static int change_memory_common(unsigned long addr, int
> numpages,
> */
> if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
> pgprot_val(clear_mask) == PTE_RDONLY)) {
> - for (i = 0; i < area->nr_pages; i++) {
> - __change_memory_common((u64)page_address(area->pages[i]),
> + unsigned long idx = (start - (unsigned long)area->addr) >>
> PAGE_SHIFT;
> + for (i = 0; i < numpages; i++) {
> + __change_memory_common((u64)page_address(area->pages[idx++]),
> PAGE_SIZE, set_mask, clear_mask);
> }
> }
>
> Just build tested. Does it look reasonable?
Yes that looks correct to me! Will you submit a patch?
Thanks,
Ryan
>
> Thanks,
> Yang
>
>
>> if (!numpages)
>
On 10/10/25 2:52 AM, Ryan Roberts wrote:
> Hi Yang,
>
>
> On 09/10/2025 21:26, Yang Shi wrote:
>>
>> On 3/27/25 11:21 PM, Dev Jain wrote:
>>> arm64 uses apply_to_page_range to change permissions for kernel VA mappings,
>>> which does not support changing permissions for leaf mappings. This function
>>> will change permissions until it encounters a leaf mapping, and will bail
>>> out. To avoid this partial change, explicitly disallow changing permissions
>>> for VM_ALLOW_HUGE_VMAP mappings.
>>>
>>> Signed-off-by: Dev Jain <dev.jain@arm.com>
>>> ---
>>> arch/arm64/mm/pageattr.c | 4 ++--
>>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
>>> index 39fd1f7ff02a..8337c88eec69 100644
>>> --- a/arch/arm64/mm/pageattr.c
>>> +++ b/arch/arm64/mm/pageattr.c
>>> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int
>>> numpages,
>>> * we are operating on does not result in such splitting.
>>> *
>>> * Let's restrict ourselves to mappings created by vmalloc (or vmap).
>>> - * Those are guaranteed to consist entirely of page mappings, and
>>> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that
>>> * splitting is never needed.
>>> *
>>> * So check whether the [addr, addr + size) interval is entirely
>>> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int
>>> numpages,
>>> area = find_vm_area((void *)addr);
>>> if (!area ||
>>> end > (unsigned long)kasan_reset_tag(area->addr) + area->size ||
>>> - !(area->flags & VM_ALLOC))
>>> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
>>> return -EINVAL;
>> I happened to find this patch when I was looking into fixing "splitting is never
>> needed" comment to reflect the latest change with BBML2_NOABORT and tried to
>> relax this restriction. I agree with the justification for this patch to make
>> the code more robust for permission update on partial range. But the following
>> linear mapping permission update code seems still assume partial range update
>> never happens:
>>
>> for (i = 0; i < area->nr_pages; i++) {
>>
>> It iterates all pages for this vm area from the first page then update their
>> permissions. So I think we should do the below to make it more robust to partial
>> range update like this patch did:
> Ahh so the issue is that [addr, addr + numpages * PAGE_SIZE) may only cover a
> portion of the vm area? But the current code updates the permissions for the
> whole vm area? Ouch...
Yes. I didn't see anyone actually does partial range update as the
earlier discussion said, but this is another "footgun waiting to go off"
too. We'd better to get aligned with this patch.
>
>> --- a/arch/arm64/mm/pageattr.c
>> +++ b/arch/arm64/mm/pageattr.c
>> @@ -185,8 +185,9 @@ static int change_memory_common(unsigned long addr, int
>> numpages,
>> */
>> if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
>> pgprot_val(clear_mask) == PTE_RDONLY)) {
>> - for (i = 0; i < area->nr_pages; i++) {
>> - __change_memory_common((u64)page_address(area->pages[i]),
>> + unsigned long idx = (start - (unsigned long)area->addr) >>
>> PAGE_SHIFT;
>> + for (i = 0; i < numpages; i++) {
>> + __change_memory_common((u64)page_address(area->pages[idx++]),
>> PAGE_SIZE, set_mask, clear_mask);
>> }
>> }
>>
>> Just build tested. Does it look reasonable?
> Yes that looks correct to me! Will you submit a patch?
Yes. I will prepare the patches once -rc1 is available.
Thanks,
Yang
>
> Thanks,
> Ryan
>
>> Thanks,
>> Yang
>>
>>
>>> if (!numpages)
On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote:
> arm64 uses apply_to_page_range to change permissions for kernel VA mappings,
for vmalloc mappings ^
arm64 does not allow changing permissions to any VA address right now.
> which does not support changing permissions for leaf mappings. This function
> will change permissions until it encounters a leaf mapping, and will bail
> out. To avoid this partial change, explicitly disallow changing permissions
> for VM_ALLOW_HUGE_VMAP mappings.
>
> Signed-off-by: Dev Jain <dev.jain@arm.com>
> ---
> arch/arm64/mm/pageattr.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
> index 39fd1f7ff02a..8337c88eec69 100644
> --- a/arch/arm64/mm/pageattr.c
> +++ b/arch/arm64/mm/pageattr.c
> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages,
> * we are operating on does not result in such splitting.
> *
> * Let's restrict ourselves to mappings created by vmalloc (or vmap).
> - * Those are guaranteed to consist entirely of page mappings, and
> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that
I'd keep mention of page mappings in the comment, e.g
* Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page
* mappings are updated and splitting is never needed.
With this and changelog updates Ryan asked for
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> * splitting is never needed.
> *
> * So check whether the [addr, addr + size) interval is entirely
> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages,
> area = find_vm_area((void *)addr);
> if (!area ||
> end > (unsigned long)kasan_reset_tag(area->addr) + area->size ||
> - !(area->flags & VM_ALLOC))
> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC))
> return -EINVAL;
>
> if (!numpages)
> --
> 2.30.2
>
--
Sincerely yours,
Mike.
On 29/03/25 4:20 am, Mike Rapoport wrote: > On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: >> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, > > for vmalloc mappings ^ > > arm64 does not allow changing permissions to any VA address right now. Sorry, mistakenly used them interchangeably. I'll fix this. > >> which does not support changing permissions for leaf mappings. This function >> will change permissions until it encounters a leaf mapping, and will bail >> out. To avoid this partial change, explicitly disallow changing permissions >> for VM_ALLOW_HUGE_VMAP mappings. >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> --- >> arch/arm64/mm/pageattr.c | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c >> index 39fd1f7ff02a..8337c88eec69 100644 >> --- a/arch/arm64/mm/pageattr.c >> +++ b/arch/arm64/mm/pageattr.c >> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, >> * we are operating on does not result in such splitting. >> * >> * Let's restrict ourselves to mappings created by vmalloc (or vmap). >> - * Those are guaranteed to consist entirely of page mappings, and >> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that > > I'd keep mention of page mappings in the comment, e.g > > * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page > * mappings are updated and splitting is never needed. > > With this and changelog updates Ryan asked for > > Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Thanks! > > >> * splitting is never needed. >> * >> * So check whether the [addr, addr + size) interval is entirely >> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, >> area = find_vm_area((void *)addr); >> if (!area || >> end > (unsigned long)kasan_reset_tag(area->addr) + area->size || >> - !(area->flags & VM_ALLOC)) >> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) >> return -EINVAL; >> >> if (!numpages) >> -- >> 2.30.2 >> >
On 28/03/2025 18:50, Mike Rapoport wrote: > On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: >> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, > > for vmalloc mappings ^ > > arm64 does not allow changing permissions to any VA address right now. > >> which does not support changing permissions for leaf mappings. This function >> will change permissions until it encounters a leaf mapping, and will bail >> out. To avoid this partial change, explicitly disallow changing permissions >> for VM_ALLOW_HUGE_VMAP mappings. >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. >> --- >> arch/arm64/mm/pageattr.c | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c >> index 39fd1f7ff02a..8337c88eec69 100644 >> --- a/arch/arm64/mm/pageattr.c >> +++ b/arch/arm64/mm/pageattr.c >> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, >> * we are operating on does not result in such splitting. >> * >> * Let's restrict ourselves to mappings created by vmalloc (or vmap). >> - * Those are guaranteed to consist entirely of page mappings, and >> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that > > I'd keep mention of page mappings in the comment, e.g > > * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page > * mappings are updated and splitting is never needed. > > With this and changelog updates Ryan asked for > > Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > > >> * splitting is never needed. >> * >> * So check whether the [addr, addr + size) interval is entirely >> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, >> area = find_vm_area((void *)addr); >> if (!area || >> end > (unsigned long)kasan_reset_tag(area->addr) + area->size || >> - !(area->flags & VM_ALLOC)) >> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) >> return -EINVAL; >> >> if (!numpages) >> -- >> 2.30.2 >> >
On Sat, Mar 29, 2025 at 09:46:56AM +0000, Ryan Roberts wrote: > On 28/03/2025 18:50, Mike Rapoport wrote: > > On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: > >> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, > > > > for vmalloc mappings ^ > > > > arm64 does not allow changing permissions to any VA address right now. > > > >> which does not support changing permissions for leaf mappings. This function > >> will change permissions until it encounters a leaf mapping, and will bail > >> out. To avoid this partial change, explicitly disallow changing permissions > >> for VM_ALLOW_HUGE_VMAP mappings. > >> > >> Signed-off-by: Dev Jain <dev.jain@arm.com> > > I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. We have only a few places that use vmalloc_huge() or VM_ALLOW_HUGE_VMAP and if there was a code that plays permission games on these allocations, x86 set_memory would blow up immediately, so I don't think Fixes: is needed here. > >> --- > >> arch/arm64/mm/pageattr.c | 4 ++-- > >> 1 file changed, 2 insertions(+), 2 deletions(-) > >> > >> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c > >> index 39fd1f7ff02a..8337c88eec69 100644 > >> --- a/arch/arm64/mm/pageattr.c > >> +++ b/arch/arm64/mm/pageattr.c > >> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, > >> * we are operating on does not result in such splitting. > >> * > >> * Let's restrict ourselves to mappings created by vmalloc (or vmap). > >> - * Those are guaranteed to consist entirely of page mappings, and > >> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that > > > > I'd keep mention of page mappings in the comment, e.g > > > > * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page > > * mappings are updated and splitting is never needed. > > > > With this and changelog updates Ryan asked for > > > > Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > > > > > >> * splitting is never needed. > >> * > >> * So check whether the [addr, addr + size) interval is entirely > >> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, > >> area = find_vm_area((void *)addr); > >> if (!area || > >> end > (unsigned long)kasan_reset_tag(area->addr) + area->size || > >> - !(area->flags & VM_ALLOC)) > >> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) > >> return -EINVAL; > >> > >> if (!numpages) > >> -- > >> 2.30.2 > >> > > > -- Sincerely yours, Mike.
On 30/03/2025 03:32, Mike Rapoport wrote: > On Sat, Mar 29, 2025 at 09:46:56AM +0000, Ryan Roberts wrote: >> On 28/03/2025 18:50, Mike Rapoport wrote: >>> On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: >>>> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, >>> >>> for vmalloc mappings ^ >>> >>> arm64 does not allow changing permissions to any VA address right now. >>> >>>> which does not support changing permissions for leaf mappings. This function >>>> will change permissions until it encounters a leaf mapping, and will bail >>>> out. To avoid this partial change, explicitly disallow changing permissions >>>> for VM_ALLOW_HUGE_VMAP mappings. >>>> >>>> Signed-off-by: Dev Jain <dev.jain@arm.com> >> >> I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. > > We have only a few places that use vmalloc_huge() or VM_ALLOW_HUGE_VMAP and > if there was a code that plays permission games on these allocations, x86 > set_memory would blow up immediately, so I don't think Fixes: is needed > here. Hi Mike, I think I may have misunderstood your comments when we spoke at LSF/MM the other day, as this statement seems to contradict. I thought you said that on x86 BPF allocates memory using vmalloc_huge()/VM_ALLOW_HUGE_VMAP and then it's sub-allocator will set_memory_*() on a sub-region of that allocation? (And we then agreed that it would be good for arm64 to eventually support this with BBML2). Anyway, regardless, I think this change is useful first step to improving vmalloc as it makes us more defensive against any future attempt to change permissions on a huge allocation. In the long term I'd like to get to the point where arm64 (with BBML2) can map with VM_ALLOW_HUGE_VMAP by default. Thanks, Ryan > >>>> --- >>>> arch/arm64/mm/pageattr.c | 4 ++-- >>>> 1 file changed, 2 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c >>>> index 39fd1f7ff02a..8337c88eec69 100644 >>>> --- a/arch/arm64/mm/pageattr.c >>>> +++ b/arch/arm64/mm/pageattr.c >>>> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, >>>> * we are operating on does not result in such splitting. >>>> * >>>> * Let's restrict ourselves to mappings created by vmalloc (or vmap). >>>> - * Those are guaranteed to consist entirely of page mappings, and >>>> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that >>> >>> I'd keep mention of page mappings in the comment, e.g >>> >>> * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page >>> * mappings are updated and splitting is never needed. >>> >>> With this and changelog updates Ryan asked for >>> >>> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> >>> >>> >>>> * splitting is never needed. >>>> * >>>> * So check whether the [addr, addr + size) interval is entirely >>>> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, >>>> area = find_vm_area((void *)addr); >>>> if (!area || >>>> end > (unsigned long)kasan_reset_tag(area->addr) + area->size || >>>> - !(area->flags & VM_ALLOC)) >>>> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) >>>> return -EINVAL; >>>> >>>> if (!numpages) >>>> -- >>>> 2.30.2 >>>> >>> >> >
Hi Ryan, On Tue, Apr 01, 2025 at 10:43:01AM +0100, Ryan Roberts wrote: > On 30/03/2025 03:32, Mike Rapoport wrote: > > On Sat, Mar 29, 2025 at 09:46:56AM +0000, Ryan Roberts wrote: > >> On 28/03/2025 18:50, Mike Rapoport wrote: > >>> On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: > >>>> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, > >>> > >>> for vmalloc mappings ^ > >>> > >>> arm64 does not allow changing permissions to any VA address right now. > >>> > >>>> which does not support changing permissions for leaf mappings. This function > >>>> will change permissions until it encounters a leaf mapping, and will bail > >>>> out. To avoid this partial change, explicitly disallow changing permissions > >>>> for VM_ALLOW_HUGE_VMAP mappings. > >>>> > >>>> Signed-off-by: Dev Jain <dev.jain@arm.com> > >> > >> I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. > > > > We have only a few places that use vmalloc_huge() or VM_ALLOW_HUGE_VMAP and > > if there was a code that plays permission games on these allocations, x86 > > set_memory would blow up immediately, so I don't think Fixes: is needed > > here. > > Hi Mike, > > I think I may have misunderstood your comments when we spoke at LSF/MM the other > day, as this statement seems to contradict. I thought you said that on x86 BPF > allocates memory using vmalloc_huge()/VM_ALLOW_HUGE_VMAP and then it's > sub-allocator will set_memory_*() on a sub-region of that allocation? (And we > then agreed that it would be good for arm64 to eventually support this with BBML2). I misremembered :) They do allocate several PMD_SIZE chunks at once, but they don't use vmalloc_huge(), so everything there is mapped with base pages. And now they are using execmem rather than vmalloc directly, and execmem doesn't use VM_ALLOW_HUGE_VMAP anywhere except modules on x86. > Anyway, regardless, I think this change is useful first step to improving > vmalloc as it makes us more defensive against any future attempt to change > permissions on a huge allocation. In the long term I'd like to get to the point > where arm64 (with BBML2) can map with VM_ALLOW_HUGE_VMAP by default. > > Thanks, > Ryan -- Sincerely yours, Mike.
On 01/04/2025 06:12, Mike Rapoport wrote: > Hi Ryan, > > On Tue, Apr 01, 2025 at 10:43:01AM +0100, Ryan Roberts wrote: >> On 30/03/2025 03:32, Mike Rapoport wrote: >>> On Sat, Mar 29, 2025 at 09:46:56AM +0000, Ryan Roberts wrote: >>>> On 28/03/2025 18:50, Mike Rapoport wrote: >>>>> On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: >>>>>> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, >>>>> >>>>> for vmalloc mappings ^ >>>>> >>>>> arm64 does not allow changing permissions to any VA address right now. >>>>> >>>>>> which does not support changing permissions for leaf mappings. This function >>>>>> will change permissions until it encounters a leaf mapping, and will bail >>>>>> out. To avoid this partial change, explicitly disallow changing permissions >>>>>> for VM_ALLOW_HUGE_VMAP mappings. >>>>>> >>>>>> Signed-off-by: Dev Jain <dev.jain@arm.com> >>>> >>>> I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. >>> >>> We have only a few places that use vmalloc_huge() or VM_ALLOW_HUGE_VMAP and >>> if there was a code that plays permission games on these allocations, x86 >>> set_memory would blow up immediately, so I don't think Fixes: is needed >>> here. >> >> Hi Mike, >> >> I think I may have misunderstood your comments when we spoke at LSF/MM the other >> day, as this statement seems to contradict. I thought you said that on x86 BPF >> allocates memory using vmalloc_huge()/VM_ALLOW_HUGE_VMAP and then it's >> sub-allocator will set_memory_*() on a sub-region of that allocation? (And we >> then agreed that it would be good for arm64 to eventually support this with BBML2). > > I misremembered :) > They do allocate several PMD_SIZE chunks at once, but they don't use > vmalloc_huge(), so everything there is mapped with base pages. > > And now they are using execmem rather than vmalloc directly, and execmem > doesn't use VM_ALLOW_HUGE_VMAP anywhere except modules on x86. Ahh OK! Like I said, I don't think it reduces the value of this patch though. > >> Anyway, regardless, I think this change is useful first step to improving >> vmalloc as it makes us more defensive against any future attempt to change >> permissions on a huge allocation. In the long term I'd like to get to the point >> where arm64 (with BBML2) can map with VM_ALLOW_HUGE_VMAP by default. >> >> Thanks, >> Ryan >
On 30/03/25 1:02 pm, Mike Rapoport wrote: > On Sat, Mar 29, 2025 at 09:46:56AM +0000, Ryan Roberts wrote: >> On 28/03/2025 18:50, Mike Rapoport wrote: >>> On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: >>>> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, >>> >>> for vmalloc mappings ^ >>> >>> arm64 does not allow changing permissions to any VA address right now. >>> >>>> which does not support changing permissions for leaf mappings. This function >>>> will change permissions until it encounters a leaf mapping, and will bail >>>> out. To avoid this partial change, explicitly disallow changing permissions >>>> for VM_ALLOW_HUGE_VMAP mappings. >>>> >>>> Signed-off-by: Dev Jain <dev.jain@arm.com> >> >> I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. > > We have only a few places that use vmalloc_huge() or VM_ALLOW_HUGE_VMAP and > if there was a code that plays permission games on these allocations, x86 > set_memory would blow up immediately, so I don't think Fixes: is needed > here. But I think x86 can handle this (split_large_page() in __change_page_attr()) ? > >>>> --- >>>> arch/arm64/mm/pageattr.c | 4 ++-- >>>> 1 file changed, 2 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c >>>> index 39fd1f7ff02a..8337c88eec69 100644 >>>> --- a/arch/arm64/mm/pageattr.c >>>> +++ b/arch/arm64/mm/pageattr.c >>>> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, >>>> * we are operating on does not result in such splitting. >>>> * >>>> * Let's restrict ourselves to mappings created by vmalloc (or vmap). >>>> - * Those are guaranteed to consist entirely of page mappings, and >>>> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that >>> >>> I'd keep mention of page mappings in the comment, e.g >>> >>> * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page >>> * mappings are updated and splitting is never needed. >>> >>> With this and changelog updates Ryan asked for >>> >>> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> >>> >>> >>>> * splitting is never needed. >>>> * >>>> * So check whether the [addr, addr + size) interval is entirely >>>> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, >>>> area = find_vm_area((void *)addr); >>>> if (!area || >>>> end > (unsigned long)kasan_reset_tag(area->addr) + area->size || >>>> - !(area->flags & VM_ALLOC)) >>>> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) >>>> return -EINVAL; >>>> >>>> if (!numpages) >>>> -- >>>> 2.30.2 >>>> >>> >> >
On Sun, Mar 30, 2025 at 01:53:57PM +0530, Dev Jain wrote: > > > On 30/03/25 1:02 pm, Mike Rapoport wrote: > > On Sat, Mar 29, 2025 at 09:46:56AM +0000, Ryan Roberts wrote: > > > On 28/03/2025 18:50, Mike Rapoport wrote: > > > > On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: > > > > > arm64 uses apply_to_page_range to change permissions for kernel VA mappings, > > > > > > > > for vmalloc mappings ^ > > > > > > > > arm64 does not allow changing permissions to any VA address right now. > > > > > > > > > which does not support changing permissions for leaf mappings. This function > > > > > will change permissions until it encounters a leaf mapping, and will bail > > > > > out. To avoid this partial change, explicitly disallow changing permissions > > > > > for VM_ALLOW_HUGE_VMAP mappings. > > > > > > > > > > Signed-off-by: Dev Jain <dev.jain@arm.com> > > > > > > I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. > > > > We have only a few places that use vmalloc_huge() or VM_ALLOW_HUGE_VMAP and > > if there was a code that plays permission games on these allocations, x86 > > set_memory would blow up immediately, so I don't think Fixes: is needed > > here. > > But I think x86 can handle this (split_large_page() in __change_page_attr()) > ? Yes, but it also updates corresponding direct map entries when vmalloc permissions change and the direct map update presumes physical contiguity of the range. > > > > > --- > > > > > arch/arm64/mm/pageattr.c | 4 ++-- > > > > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c > > > > > index 39fd1f7ff02a..8337c88eec69 100644 > > > > > --- a/arch/arm64/mm/pageattr.c > > > > > +++ b/arch/arm64/mm/pageattr.c > > > > > @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, > > > > > * we are operating on does not result in such splitting. > > > > > * > > > > > * Let's restrict ourselves to mappings created by vmalloc (or vmap). > > > > > - * Those are guaranteed to consist entirely of page mappings, and > > > > > + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that > > > > > > > > I'd keep mention of page mappings in the comment, e.g > > > > > > > > * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page > > > > * mappings are updated and splitting is never needed. > > > > > > > > With this and changelog updates Ryan asked for > > > > > > > > Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > > > > > > > > > > > > > * splitting is never needed. > > > > > * > > > > > * So check whether the [addr, addr + size) interval is entirely > > > > > @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, > > > > > area = find_vm_area((void *)addr); > > > > > if (!area || > > > > > end > (unsigned long)kasan_reset_tag(area->addr) + area->size || > > > > > - !(area->flags & VM_ALLOC)) > > > > > + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) > > > > > return -EINVAL; > > > > > if (!numpages) > > > > > -- > > > > > 2.30.2 > > > > > > > > > > > > > > > -- Sincerely yours, Mike.
On 29/03/25 3:16 pm, Ryan Roberts wrote: > On 28/03/2025 18:50, Mike Rapoport wrote: >> On Fri, Mar 28, 2025 at 11:51:03AM +0530, Dev Jain wrote: >>> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, >> >> for vmalloc mappings ^ >> >> arm64 does not allow changing permissions to any VA address right now. >> >>> which does not support changing permissions for leaf mappings. This function >>> will change permissions until it encounters a leaf mapping, and will bail >>> out. To avoid this partial change, explicitly disallow changing permissions >>> for VM_ALLOW_HUGE_VMAP mappings. >>> >>> Signed-off-by: Dev Jain <dev.jain@arm.com> > > I wonder if we want a Fixes: tag here? It's certainly a *latent* bug. I am struggling to find the commit till which we want to backport. Should it be e920722 (arm64: support huge vmalloc mappings)? > >>> --- >>> arch/arm64/mm/pageattr.c | 4 ++-- >>> 1 file changed, 2 insertions(+), 2 deletions(-) >>> >>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c >>> index 39fd1f7ff02a..8337c88eec69 100644 >>> --- a/arch/arm64/mm/pageattr.c >>> +++ b/arch/arm64/mm/pageattr.c >>> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, >>> * we are operating on does not result in such splitting. >>> * >>> * Let's restrict ourselves to mappings created by vmalloc (or vmap). >>> - * Those are guaranteed to consist entirely of page mappings, and >>> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that >> >> I'd keep mention of page mappings in the comment, e.g >> >> * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page >> * mappings are updated and splitting is never needed. >> >> With this and changelog updates Ryan asked for >> >> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> >> >> >>> * splitting is never needed. >>> * >>> * So check whether the [addr, addr + size) interval is entirely >>> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, >>> area = find_vm_area((void *)addr); >>> if (!area || >>> end > (unsigned long)kasan_reset_tag(area->addr) + area->size || >>> - !(area->flags & VM_ALLOC)) >>> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) >>> return -EINVAL; >>> >>> if (!numpages) >>> -- >>> 2.30.2 >>> >> >
On 28/03/2025 02:21, Dev Jain wrote: > arm64 uses apply_to_page_range to change permissions for kernel VA mappings, > which does not support changing permissions for leaf mappings. This function I think you mean "block" mappings here? A leaf mapping refers to a page table entry that maps a piece of memory at any level in the pgtable (i.e. a present entry that does not map a table). A block mapping is an Arm ARM term used to mean a leaf mapping at a level other than the last level (e.g. pmd, pud). A page mapping is an Arm ARM term used to mean a leaf mapping at the last level (e.g. pte). > will change permissions until it encounters a leaf mapping, and will bail block mapping > out. To avoid this partial change, explicitly disallow changing permissions > for VM_ALLOW_HUGE_VMAP mappings. It will also emit a warning. Since there are no reports of this triggering, it implies that there are currently no cases of code doing a vmalloc_huge() followed by partial permission change, at least on arm64 (I'm told BPF does do this on x86 though). But this is a footgun waiting to go off, so let's detect it early and avoid the possibility of permissions in an intermediate state. (It might be worth wordsmithing this into the commit log). > > Signed-off-by: Dev Jain <dev.jain@arm.com> With the commit log fixed up: Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> > --- > arch/arm64/mm/pageattr.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c > index 39fd1f7ff02a..8337c88eec69 100644 > --- a/arch/arm64/mm/pageattr.c > +++ b/arch/arm64/mm/pageattr.c > @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, > * we are operating on does not result in such splitting. > * > * Let's restrict ourselves to mappings created by vmalloc (or vmap). > - * Those are guaranteed to consist entirely of page mappings, and > + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that > * splitting is never needed. > * > * So check whether the [addr, addr + size) interval is entirely > @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, > area = find_vm_area((void *)addr); > if (!area || > end > (unsigned long)kasan_reset_tag(area->addr) + area->size || > - !(area->flags & VM_ALLOC)) > + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) > return -EINVAL; > > if (!numpages)
On 28/03/25 8:09 pm, Ryan Roberts wrote: > On 28/03/2025 02:21, Dev Jain wrote: >> arm64 uses apply_to_page_range to change permissions for kernel VA mappings, >> which does not support changing permissions for leaf mappings. This function > > I think you mean "block" mappings here? A leaf mapping refers to a page table > entry that maps a piece of memory at any level in the pgtable (i.e. a present > entry that does not map a table). > > A block mapping is an Arm ARM term used to mean a leaf mapping at a level other > than the last level (e.g. pmd, pud). A page mapping is an Arm ARM term used to > mean a leaf mapping at the last level (e.g. pte). > >> will change permissions until it encounters a leaf mapping, and will bail > > block mapping > >> out. To avoid this partial change, explicitly disallow changing permissions >> for VM_ALLOW_HUGE_VMAP mappings. > > It will also emit a warning. Since there are no reports of this triggering, it > implies that there are currently no cases of code doing a vmalloc_huge() > followed by partial permission change, at least on arm64 (I'm told BPF does do > this on x86 though). But this is a footgun waiting to go off, so let's detect it > early and avoid the possibility of permissions in an intermediate state. (It > might be worth wordsmithing this into the commit log). Thanks, I will add this. > >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> > > With the commit log fixed up: > > Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> Thanks! > >> --- >> arch/arm64/mm/pageattr.c | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c >> index 39fd1f7ff02a..8337c88eec69 100644 >> --- a/arch/arm64/mm/pageattr.c >> +++ b/arch/arm64/mm/pageattr.c >> @@ -96,7 +96,7 @@ static int change_memory_common(unsigned long addr, int numpages, >> * we are operating on does not result in such splitting. >> * >> * Let's restrict ourselves to mappings created by vmalloc (or vmap). >> - * Those are guaranteed to consist entirely of page mappings, and >> + * Disallow VM_ALLOW_HUGE_VMAP vmalloc mappings so that >> * splitting is never needed. >> * >> * So check whether the [addr, addr + size) interval is entirely >> @@ -105,7 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, >> area = find_vm_area((void *)addr); >> if (!area || >> end > (unsigned long)kasan_reset_tag(area->addr) + area->size || >> - !(area->flags & VM_ALLOC)) >> + ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) >> return -EINVAL; >> >> if (!numpages) >
© 2016 - 2025 Red Hat, Inc.