A fundamental principle of page table type safety is that `pte_t` represents
the lowest level page table entry and should never carry huge page attributes.
Currently, passing a pgprot with huge page bits (e.g., extracted via
pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the huge
attribute, leading to the necessity of the ugly `pte_clrhuge()` anti-pattern.
Enforce type safety by making `pfn_pte()` inherently filter out huge page
attributes:
- On x86: Strip the `_PAGE_PSE` bit.
- On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
enforce the `PTE_TYPE_PAGE` format.
- On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
exact same hardware format and do not use a distinct huge bit.
Signed-off-by: Yin Tirui <yintirui@huawei.com>
---
arch/arm64/include/asm/pgtable.h | 4 +++-
arch/x86/include/asm/pgtable.h | 4 ++++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b3e58735c49b..f2a7a40106d2 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -141,7 +141,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
#define pfn_pte(pfn,prot) \
- __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
+ __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
+ ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
+ (PTE_TYPE_PAGE & ~PTE_VALID)))
#define pte_none(pte) (!pte_val(pte))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1662c5a8f445..a4dbd81d42bf 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+
+ /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page bit */
+ pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
+
/* This bit combination is used to mark shadow stacks */
WARN_ON_ONCE((pgprot_val(pgprot) & (_PAGE_DIRTY | _PAGE_RW)) ==
_PAGE_DIRTY);
--
2.22.0
On 28.02.26 08:09, Yin Tirui wrote:
> A fundamental principle of page table type safety is that `pte_t` represents
> the lowest level page table entry and should never carry huge page attributes.
>
> Currently, passing a pgprot with huge page bits (e.g., extracted via
> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the huge
> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti-pattern.
>
> Enforce type safety by making `pfn_pte()` inherently filter out huge page
> attributes:
> - On x86: Strip the `_PAGE_PSE` bit.
> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
> enforce the `PTE_TYPE_PAGE` format.
> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
> exact same hardware format and do not use a distinct huge bit.
>
> Signed-off-by: Yin Tirui <yintirui@huawei.com>
> ---
> arch/arm64/include/asm/pgtable.h | 4 +++-
> arch/x86/include/asm/pgtable.h | 4 ++++
> 2 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index b3e58735c49b..f2a7a40106d2 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -141,7 +141,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>
> #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
> #define pfn_pte(pfn,prot) \
> - __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
> + __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
> + ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
> + (PTE_TYPE_PAGE & ~PTE_VALID)))
>
> #define pte_none(pte) (!pte_val(pte))
> #define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index 1662c5a8f445..a4dbd81d42bf 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
> static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
> {
> phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
> +
> + /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page bit */
> + pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
Is it really a good idea to silently drop the bit?
Today it can either be used for a large page (which should be a pmd,
of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
at the same position in PTEs.
So basically you are removing the ability to use some cache modes.
NACK!
Juergen
On 3/4/2026 3:52 PM, Jürgen Groß wrote:
> On 28.02.26 08:09, Yin Tirui wrote:
>> A fundamental principle of page table type safety is that `pte_t`
>> represents
>> the lowest level page table entry and should never carry huge page
>> attributes.
>>
>> Currently, passing a pgprot with huge page bits (e.g., extracted via
>> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the
>> huge
>> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti-
>> pattern.
>>
>> Enforce type safety by making `pfn_pte()` inherently filter out huge page
>> attributes:
>> - On x86: Strip the `_PAGE_PSE` bit.
>> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
>> enforce the `PTE_TYPE_PAGE` format.
>> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
>> exact same hardware format and do not use a distinct huge bit.
>>
>> Signed-off-by: Yin Tirui <yintirui@huawei.com>
>> ---
>> arch/arm64/include/asm/pgtable.h | 4 +++-
>> arch/x86/include/asm/pgtable.h | 4 ++++
>> 2 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/
>> asm/pgtable.h
>> index b3e58735c49b..f2a7a40106d2 100644
>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -141,7 +141,9 @@ static inline pteval_t
>> __phys_to_pte_val(phys_addr_t phys)
>> #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
>> #define pfn_pte(pfn,prot) \
>> - __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) |
>> pgprot_val(prot))
>> + __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
>> + ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
>> + (PTE_TYPE_PAGE & ~PTE_VALID)))
>> #define pte_none(pte) (!pte_val(pte))
>> #define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
>> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/
>> pgtable.h
>> index 1662c5a8f445..a4dbd81d42bf 100644
>> --- a/arch/x86/include/asm/pgtable.h
>> +++ b/arch/x86/include/asm/pgtable.h
>> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t
>> pgprot)
>> static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
>> {
>> phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
>> +
>> + /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page
>> bit */
>> + pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
>
> Is it really a good idea to silently drop the bit?
>
> Today it can either be used for a large page (which should be a pmd,
> of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
> at the same position in PTEs.
>
> So basically you are removing the ability to use some cache modes.
>
> NACK!
>
>
> Juergen
Hi Willy and Jürgen,
Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue.
To achieve the goal of keeping pfn_pte() pure and completely eradicating
the pte_clrhuge() anti-pattern, we need a way to ensure pfn_pte() never
receives a pgprot with the huge bit set.
@Jürgen:
Just to be absolutely certain: is there any safe way to filter out the
huge page attributes directly inside x86's pfn_pte() without breaking
PAT? Or does the hardware bit-aliasing make this strictly impossible at
the pfn_pte() level?
@Willy @Jürgen:
Assuming it is impossible to filter this safely inside pfn_pte() on x86,
we must translate the pgprot before passing it down. To maintain strict
type-safety and still drop pte_clrhuge(), I plan to introduce two
arch-neutral wrappers:
x86:
/* Translates large prot to 4K. Shifts PAT back to bit 7, inherently
clearing _PAGE_PSE */
#define pgprot_huge_to_pte(prot) pgprot_large_2_4k(prot)
/* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets
_PAGE_PSE */
#define pgprot_pte_to_huge(prot)
__pgprot(pgprot_val(pgprot_4k_2_large(prot)) | _PAGE_PSE)
arm64:
/*
* Drops Block marker, enforces Page marker.
* Strictly preserves the PTE_VALID bit to avoid validating PROT_NONE
pages.
*/
#define pgprot_huge_to_pte(prot) \
__pgprot((pgprot_val(prot) & ~(PMD_TYPE_MASK & ~PTE_VALID)) | \
(PTE_TYPE_PAGE & ~PTE_VALID))
/*
* Drops Page marker, sets Block marker.
* Strictly preserves the PTE_VALID bit.
*/
#define pgprot_pte_to_huge(prot) \
__pgprot((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
(PMD_TYPE_SECT & ~PTE_VALID))
Usage:
1. Creating a huge pfnmap (remap_try_huge_pmd)
pgprot_t huge_prot = pgprot_pte_to_huge(prot);
/* No need for pmd_mkhuge() */
pmd_t entry = pmd_mkspecial(pfn_pmd(pfn, huge_prot));
set_pmd_at(mm, addr, pmd, entry);
2. Splitting a huge pfnmap (__split_huge_pmd_locked)
pgprot_t small_prot = pgprot_huge_to_pte(pmd_pgprot(old_pmd));
/* No need for pte_clrhuge() */
pte_t entry = pfn_pte(pmd_pfn(old_pmd), small_prot);
set_ptes(mm, haddr, pte, entry, HPAGE_PMD_NR);
Willy, is there a better architectural approach to handle this and
satisfy the type-safety requirement given the x86 hardware constraints?
--
Thanks,
Yin Tirui
On Thu, Mar 05, 2026 at 05:38:46PM +0800, Yin Tirui wrote: > On 3/4/2026 3:52 PM, Jürgen Groß wrote: > > Today it can either be used for a large page (which should be a pmd, > > of course), or - much worse - you'd strip the _PAGE_PAT bit, which is > > at the same position in PTEs. > > > > So basically you are removing the ability to use some cache modes. > > > > NACK! > > > > > > Juergen > > Hi Willy and Jürgen, > > Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue. > > To achieve the goal of keeping pfn_pte() pure and completely eradicating the > pte_clrhuge() anti-pattern, we need a way to ensure pfn_pte() never receives > a pgprot with the huge bit set. > > @Jürgen: > Just to be absolutely certain: is there any safe way to filter out the huge > page attributes directly inside x86's pfn_pte() without breaking PAT? Or > does the hardware bit-aliasing make this strictly impossible at the > pfn_pte() level? > > @Willy @Jürgen: > Assuming it is impossible to filter this safely inside pfn_pte() on x86, we > must translate the pgprot before passing it down. To maintain strict > type-safety and still drop pte_clrhuge(), I plan to introduce two > arch-neutral wrappers: > > x86: > /* Translates large prot to 4K. Shifts PAT back to bit 7, inherently > clearing _PAGE_PSE */ > #define pgprot_huge_to_pte(prot) pgprot_large_2_4k(prot) > /* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets > _PAGE_PSE */ > #define pgprot_pte_to_huge(prot) > __pgprot(pgprot_val(pgprot_4k_2_large(prot)) | _PAGE_PSE) I don't think we should have pgprot_large_2_4k(). Or rather, I think it should be embedded in pmd_pgprot() / pud_pgprot(). That is, we should have an 'ideal' pgprot which, on x86, perhaps matches that used by the 4k level. pfn_pmd() should be converting from the ideal pgprot to that actually used by PMDs (and setting _PAGE_PSE?) > arm64: > /* > * Drops Block marker, enforces Page marker. > * Strictly preserves the PTE_VALID bit to avoid validating PROT_NONE pages. > */ > #define pgprot_huge_to_pte(prot) \ > __pgprot((pgprot_val(prot) & ~(PMD_TYPE_MASK & ~PTE_VALID)) | \ > (PTE_TYPE_PAGE & ~PTE_VALID)) > /* > * Drops Page marker, sets Block marker. > * Strictly preserves the PTE_VALID bit. > */ > #define pgprot_pte_to_huge(prot) \ > __pgprot((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \ > (PMD_TYPE_SECT & ~PTE_VALID)) > > Usage: > 1. Creating a huge pfnmap (remap_try_huge_pmd) > pgprot_t huge_prot = pgprot_pte_to_huge(prot); > > /* No need for pmd_mkhuge() */ > pmd_t entry = pmd_mkspecial(pfn_pmd(pfn, huge_prot)); > set_pmd_at(mm, addr, pmd, entry); > > 2. Splitting a huge pfnmap (__split_huge_pmd_locked) > pgprot_t small_prot = pgprot_huge_to_pte(pmd_pgprot(old_pmd)); > > /* No need for pte_clrhuge() */ > pte_t entry = pfn_pte(pmd_pfn(old_pmd), small_prot); > set_ptes(mm, haddr, pte, entry, HPAGE_PMD_NR); > > > Willy, is there a better architectural approach to handle this and satisfy > the type-safety requirement given the x86 hardware constraints? > > -- > Thanks, > Yin Tirui > >
On 3/6/2026 12:25 PM, Matthew Wilcox wrote: > > I don't think we should have pgprot_large_2_4k(). Or rather, I think it > should be embedded in pmd_pgprot() / pud_pgprot(). That is, we should > have an 'ideal' pgprot which, on x86, perhaps matches that used by the > 4k level. pfn_pmd() should be converting from the ideal pgprot to > that actually used by PMDs (and setting _PAGE_PSE?) > Hi Willy, I will take this route and implement the embedded approach for the v4 respin. -- Yin Tirui
On 05.03.26 10:38, Yin Tirui wrote:
>
> On 3/4/2026 3:52 PM, Jürgen Groß wrote:
>> On 28.02.26 08:09, Yin Tirui wrote:
>>> A fundamental principle of page table type safety is that `pte_t` represents
>>> the lowest level page table entry and should never carry huge page attributes.
>>>
>>> Currently, passing a pgprot with huge page bits (e.g., extracted via
>>> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the huge
>>> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti- pattern.
>>>
>>> Enforce type safety by making `pfn_pte()` inherently filter out huge page
>>> attributes:
>>> - On x86: Strip the `_PAGE_PSE` bit.
>>> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
>>> enforce the `PTE_TYPE_PAGE` format.
>>> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
>>> exact same hardware format and do not use a distinct huge bit.
>>>
>>> Signed-off-by: Yin Tirui <yintirui@huawei.com>
>>> ---
>>> arch/arm64/include/asm/pgtable.h | 4 +++-
>>> arch/x86/include/asm/pgtable.h | 4 ++++
>>> 2 files changed, 7 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/ asm/
>>> pgtable.h
>>> index b3e58735c49b..f2a7a40106d2 100644
>>> --- a/arch/arm64/include/asm/pgtable.h
>>> +++ b/arch/arm64/include/asm/pgtable.h
>>> @@ -141,7 +141,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>>> #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
>>> #define pfn_pte(pfn,prot) \
>>> - __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) |
>>> pgprot_val(prot))
>>> + __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
>>> + ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
>>> + (PTE_TYPE_PAGE & ~PTE_VALID)))
>>> #define pte_none(pte) (!pte_val(pte))
>>> #define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
>>> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/ pgtable.h
>>> index 1662c5a8f445..a4dbd81d42bf 100644
>>> --- a/arch/x86/include/asm/pgtable.h
>>> +++ b/arch/x86/include/asm/pgtable.h
>>> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
>>> static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
>>> {
>>> phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
>>> +
>>> + /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page bit */
>>> + pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
>>
>> Is it really a good idea to silently drop the bit?
>>
>> Today it can either be used for a large page (which should be a pmd,
>> of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
>> at the same position in PTEs.
>>
>> So basically you are removing the ability to use some cache modes.
>>
>> NACK!
>>
>>
>> Juergen
>
> Hi Willy and Jürgen,
>
> Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue.
>
> To achieve the goal of keeping pfn_pte() pure and completely eradicating the
> pte_clrhuge() anti-pattern, we need a way to ensure pfn_pte() never receives a
> pgprot with the huge bit set.
>
> @Jürgen:
> Just to be absolutely certain: is there any safe way to filter out the huge page
> attributes directly inside x86's pfn_pte() without breaking PAT? Or does the
> hardware bit-aliasing make this strictly impossible at the pfn_pte() level?
There is no huge bit at the PTE level. It is existing only at the PMD and the
PUD level.
So: yes, it is absolutely impossible to filter it out, as the bit has a
different meaning in "real" PTEs (with "PTE" having the meaning: a translation
entry in a page referenced by a PMD entry not having the PSE bit set).
>
> @Willy @Jürgen:
> Assuming it is impossible to filter this safely inside pfn_pte() on x86, we must
> translate the pgprot before passing it down. To maintain strict type-safety and
> still drop pte_clrhuge(), I plan to introduce two arch-neutral wrappers:
>
> x86:
> /* Translates large prot to 4K. Shifts PAT back to bit 7, inherently clearing
> _PAGE_PSE */
> #define pgprot_huge_to_pte(prot) pgprot_large_2_4k(prot)
> /* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets _PAGE_PSE */
> #define pgprot_pte_to_huge(prot) __pgprot(pgprot_val(pgprot_4k_2_large(prot)) |
> _PAGE_PSE)
Seems to be okay.
Juergen
On 3/5/2026 6:05 PM, Jürgen Groß wrote: >> Hi Willy and Jürgen, >> >> Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue. >> >> To achieve the goal of keeping pfn_pte() pure and completely >> eradicating the pte_clrhuge() anti-pattern, we need a way to ensure >> pfn_pte() never receives a pgprot with the huge bit set. >> >> @Jürgen: >> Just to be absolutely certain: is there any safe way to filter out the >> huge page attributes directly inside x86's pfn_pte() without breaking >> PAT? Or does the hardware bit-aliasing make this strictly impossible >> at the pfn_pte() level? > > There is no huge bit at the PTE level. It is existing only at the PMD > and the > PUD level. > > So: yes, it is absolutely impossible to filter it out, as the bit has a > different meaning in "real" PTEs (with "PTE" having the meaning: a > translation > entry in a page referenced by a PMD entry not having the PSE bit set). > Hi Jürgen, Thank you for your confirmation. >> >> @Willy @Jürgen: >> Assuming it is impossible to filter this safely inside pfn_pte() on >> x86, we must translate the pgprot before passing it down. To maintain >> strict type-safety and still drop pte_clrhuge(), I plan to introduce >> two arch-neutral wrappers: >> >> x86: >> /* Translates large prot to 4K. Shifts PAT back to bit 7, inherently >> clearing _PAGE_PSE */ >> #define pgprot_huge_to_pte(prot) pgprot_large_2_4k(prot) >> /* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets >> _PAGE_PSE */ >> #define pgprot_pte_to_huge(prot) >> __pgprot(pgprot_val(pgprot_4k_2_large(prot)) | _PAGE_PSE) > > Seems to be okay. While the wrapper approach handles the aliasing, Willy recently suggested taking it a step further by embedding this translation directly into `pfn_pmd()` and `pmd_pgprot()`. I am going to explore this embedded approach for the v4 respin. -- Yin Tirui
On 3/4/2026 3:52 PM, Jürgen Groß wrote:
> On 28.02.26 08:09, Yin Tirui wrote:
>> A fundamental principle of page table type safety is that `pte_t`
>> represents
>> the lowest level page table entry and should never carry huge page
>> attributes.
>>
>> Currently, passing a pgprot with huge page bits (e.g., extracted via
>> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the
>> huge
>> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti-
>> pattern.
>>
>> Enforce type safety by making `pfn_pte()` inherently filter out huge page
>> attributes:
>> - On x86: Strip the `_PAGE_PSE` bit.
>> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
>> enforce the `PTE_TYPE_PAGE` format.
>> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
>> exact same hardware format and do not use a distinct huge bit.
>>
>> Signed-off-by: Yin Tirui <yintirui@huawei.com>
>> ---
>> arch/arm64/include/asm/pgtable.h | 4 +++-
>> arch/x86/include/asm/pgtable.h | 4 ++++
>> 2 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/
>> asm/pgtable.h
>> index b3e58735c49b..f2a7a40106d2 100644
>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -141,7 +141,9 @@ static inline pteval_t
>> __phys_to_pte_val(phys_addr_t phys)
>> #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
>> #define pfn_pte(pfn,prot) \
>> - __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) |
>> pgprot_val(prot))
>> + __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
>> + ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
>> + (PTE_TYPE_PAGE & ~PTE_VALID)))
>> #define pte_none(pte) (!pte_val(pte))
>> #define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
>> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/
>> pgtable.h
>> index 1662c5a8f445..a4dbd81d42bf 100644
>> --- a/arch/x86/include/asm/pgtable.h
>> +++ b/arch/x86/include/asm/pgtable.h
>> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t
>> pgprot)
>> static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
>> {
>> phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
>> +
>> + /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page
>> bit */
>> + pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
>
> Is it really a good idea to silently drop the bit?
>
> Today it can either be used for a large page (which should be a pmd,
> of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
> at the same position in PTEs.
>
> So basically you are removing the ability to use some cache modes.
>
> NACK!
>
>
> Juergen
Hi Jürgen,
You are absolutely right. I missed the fact that `_PAGE_PSE` aliases
with `_PAGE_PAT` on 4K PTEs.
The intention here was to follow previous feedback to enforce type
safety by filtering out huge page attributes directly inside
`pfn_pte()`. However, doing it this way obviously breaks the cache modes
on x86.
I agree with the NACK. I will drop this approach and rethink how to
handle the huge-to-normal pgprot conversion safely for v4.
--
Thanks,
Yin Tirui
© 2016 - 2026 Red Hat, Inc.