[PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes

Yin Tirui posted 4 patches 1 month ago
[PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Yin Tirui 1 month ago
A fundamental principle of page table type safety is that `pte_t` represents
the lowest level page table entry and should never carry huge page attributes.

Currently, passing a pgprot with huge page bits (e.g., extracted via
pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the huge
attribute, leading to the necessity of the ugly `pte_clrhuge()` anti-pattern.

Enforce type safety by making `pfn_pte()` inherently filter out huge page
attributes:
- On x86: Strip the `_PAGE_PSE` bit.
- On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
  enforce the `PTE_TYPE_PAGE` format.
- On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
  exact same hardware format and do not use a distinct huge bit.

Signed-off-by: Yin Tirui <yintirui@huawei.com>
---
 arch/arm64/include/asm/pgtable.h | 4 +++-
 arch/x86/include/asm/pgtable.h   | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b3e58735c49b..f2a7a40106d2 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -141,7 +141,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 
 #define pte_pfn(pte)		(__pte_to_phys(pte) >> PAGE_SHIFT)
 #define pfn_pte(pfn,prot)	\
-	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
+	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
+		((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
+		(PTE_TYPE_PAGE & ~PTE_VALID)))
 
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1662c5a8f445..a4dbd81d42bf 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
 	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
+
+	/* Filter out _PAGE_PSE to ensure PTEs never carry the huge page bit */
+	pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
+
 	/* This bit combination is used to mark shadow stacks */
 	WARN_ON_ONCE((pgprot_val(pgprot) & (_PAGE_DIRTY | _PAGE_RW)) ==
 			_PAGE_DIRTY);
-- 
2.22.0
Re: [PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Jürgen Groß 4 weeks, 1 day ago
On 28.02.26 08:09, Yin Tirui wrote:
> A fundamental principle of page table type safety is that `pte_t` represents
> the lowest level page table entry and should never carry huge page attributes.
> 
> Currently, passing a pgprot with huge page bits (e.g., extracted via
> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the huge
> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti-pattern.
> 
> Enforce type safety by making `pfn_pte()` inherently filter out huge page
> attributes:
> - On x86: Strip the `_PAGE_PSE` bit.
> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
>    enforce the `PTE_TYPE_PAGE` format.
> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
>    exact same hardware format and do not use a distinct huge bit.
> 
> Signed-off-by: Yin Tirui <yintirui@huawei.com>
> ---
>   arch/arm64/include/asm/pgtable.h | 4 +++-
>   arch/x86/include/asm/pgtable.h   | 4 ++++
>   2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index b3e58735c49b..f2a7a40106d2 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -141,7 +141,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>   
>   #define pte_pfn(pte)		(__pte_to_phys(pte) >> PAGE_SHIFT)
>   #define pfn_pte(pfn,prot)	\
> -	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
> +	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
> +		((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
> +		(PTE_TYPE_PAGE & ~PTE_VALID)))
>   
>   #define pte_none(pte)		(!pte_val(pte))
>   #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index 1662c5a8f445..a4dbd81d42bf 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
>   static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
>   {
>   	phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
> +
> +	/* Filter out _PAGE_PSE to ensure PTEs never carry the huge page bit */
> +	pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);

Is it really a good idea to silently drop the bit?

Today it can either be used for a large page (which should be a pmd,
of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
at the same position in PTEs.

So basically you are removing the ability to use some cache modes.

NACK!


Juergen
Re: [PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Yin Tirui 4 weeks ago
On 3/4/2026 3:52 PM, Jürgen Groß wrote:
> On 28.02.26 08:09, Yin Tirui wrote:
>> A fundamental principle of page table type safety is that `pte_t` 
>> represents
>> the lowest level page table entry and should never carry huge page 
>> attributes.
>>
>> Currently, passing a pgprot with huge page bits (e.g., extracted via
>> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the 
>> huge
>> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti- 
>> pattern.
>>
>> Enforce type safety by making `pfn_pte()` inherently filter out huge page
>> attributes:
>> - On x86: Strip the `_PAGE_PSE` bit.
>> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
>>    enforce the `PTE_TYPE_PAGE` format.
>> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
>>    exact same hardware format and do not use a distinct huge bit.
>>
>> Signed-off-by: Yin Tirui <yintirui@huawei.com>
>> ---
>>   arch/arm64/include/asm/pgtable.h | 4 +++-
>>   arch/x86/include/asm/pgtable.h   | 4 ++++
>>   2 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/ 
>> asm/pgtable.h
>> index b3e58735c49b..f2a7a40106d2 100644
>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -141,7 +141,9 @@ static inline pteval_t 
>> __phys_to_pte_val(phys_addr_t phys)
>>   #define pte_pfn(pte)        (__pte_to_phys(pte) >> PAGE_SHIFT)
>>   #define pfn_pte(pfn,prot)    \
>> -    __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | 
>> pgprot_val(prot))
>> +    __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
>> +        ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
>> +        (PTE_TYPE_PAGE & ~PTE_VALID)))
>>   #define pte_none(pte)        (!pte_val(pte))
>>   #define pte_page(pte)        (pfn_to_page(pte_pfn(pte)))
>> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/ 
>> pgtable.h
>> index 1662c5a8f445..a4dbd81d42bf 100644
>> --- a/arch/x86/include/asm/pgtable.h
>> +++ b/arch/x86/include/asm/pgtable.h
>> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t 
>> pgprot)
>>   static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
>>   {
>>       phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
>> +
>> +    /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page 
>> bit */
>> +    pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
> 
> Is it really a good idea to silently drop the bit?
> 
> Today it can either be used for a large page (which should be a pmd,
> of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
> at the same position in PTEs.
> 
> So basically you are removing the ability to use some cache modes.
> 
> NACK!
> 
> 
> Juergen

Hi Willy and Jürgen,

Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue.

To achieve the goal of keeping pfn_pte() pure and completely eradicating 
the pte_clrhuge() anti-pattern, we need a way to ensure pfn_pte() never 
receives a pgprot with the huge bit set.

@Jürgen:
Just to be absolutely certain: is there any safe way to filter out the 
huge page attributes directly inside x86's pfn_pte() without breaking 
PAT? Or does the hardware bit-aliasing make this strictly impossible at 
the pfn_pte() level?

@Willy @Jürgen:
Assuming it is impossible to filter this safely inside pfn_pte() on x86, 
we must translate the pgprot before passing it down. To maintain strict 
type-safety and still drop pte_clrhuge(), I plan to introduce two 
arch-neutral wrappers:

x86:
/* Translates large prot to 4K. Shifts PAT back to bit 7, inherently 
clearing _PAGE_PSE */
#define pgprot_huge_to_pte(prot)    pgprot_large_2_4k(prot)
/* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets 
_PAGE_PSE */
#define pgprot_pte_to_huge(prot) 
__pgprot(pgprot_val(pgprot_4k_2_large(prot)) | _PAGE_PSE)

arm64:
/*
  * Drops Block marker, enforces Page marker.
  * Strictly preserves the PTE_VALID bit to avoid validating PROT_NONE 
pages.
  */
#define pgprot_huge_to_pte(prot) \
      __pgprot((pgprot_val(prot) & ~(PMD_TYPE_MASK & ~PTE_VALID)) | \
             (PTE_TYPE_PAGE & ~PTE_VALID))
/*
  * Drops Page marker, sets Block marker.
  * Strictly preserves the PTE_VALID bit.
  */
#define pgprot_pte_to_huge(prot) \
      __pgprot((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
             (PMD_TYPE_SECT & ~PTE_VALID))

Usage:
1.  Creating a huge pfnmap (remap_try_huge_pmd)
pgprot_t huge_prot = pgprot_pte_to_huge(prot);

/* No need for pmd_mkhuge() */
pmd_t entry = pmd_mkspecial(pfn_pmd(pfn, huge_prot));
set_pmd_at(mm, addr, pmd, entry);

2. Splitting a huge pfnmap (__split_huge_pmd_locked)
pgprot_t small_prot = pgprot_huge_to_pte(pmd_pgprot(old_pmd));

/* No need for pte_clrhuge() */
pte_t entry = pfn_pte(pmd_pfn(old_pmd), small_prot);
set_ptes(mm, haddr, pte, entry, HPAGE_PMD_NR);


Willy, is there a better architectural approach to handle this and 
satisfy the type-safety requirement given the x86 hardware constraints?

-- 
Thanks,
Yin Tirui

Re: [PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Matthew Wilcox 3 weeks, 6 days ago
On Thu, Mar 05, 2026 at 05:38:46PM +0800, Yin Tirui wrote:
> On 3/4/2026 3:52 PM, Jürgen Groß wrote:
> > Today it can either be used for a large page (which should be a pmd,
> > of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
> > at the same position in PTEs.
> > 
> > So basically you are removing the ability to use some cache modes.
> > 
> > NACK!
> > 
> > 
> > Juergen
> 
> Hi Willy and Jürgen,
> 
> Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue.
> 
> To achieve the goal of keeping pfn_pte() pure and completely eradicating the
> pte_clrhuge() anti-pattern, we need a way to ensure pfn_pte() never receives
> a pgprot with the huge bit set.
> 
> @Jürgen:
> Just to be absolutely certain: is there any safe way to filter out the huge
> page attributes directly inside x86's pfn_pte() without breaking PAT? Or
> does the hardware bit-aliasing make this strictly impossible at the
> pfn_pte() level?
> 
> @Willy @Jürgen:
> Assuming it is impossible to filter this safely inside pfn_pte() on x86, we
> must translate the pgprot before passing it down. To maintain strict
> type-safety and still drop pte_clrhuge(), I plan to introduce two
> arch-neutral wrappers:
> 
> x86:
> /* Translates large prot to 4K. Shifts PAT back to bit 7, inherently
> clearing _PAGE_PSE */
> #define pgprot_huge_to_pte(prot)    pgprot_large_2_4k(prot)
> /* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets
> _PAGE_PSE */
> #define pgprot_pte_to_huge(prot)
> __pgprot(pgprot_val(pgprot_4k_2_large(prot)) | _PAGE_PSE)

I don't think we should have pgprot_large_2_4k().  Or rather, I think it
should be embedded in pmd_pgprot() / pud_pgprot().  That is, we should
have an 'ideal' pgprot which, on x86, perhaps matches that used by the
4k level.  pfn_pmd() should be converting from the ideal pgprot to
that actually used by PMDs (and setting _PAGE_PSE?)

> arm64:
> /*
>  * Drops Block marker, enforces Page marker.
>  * Strictly preserves the PTE_VALID bit to avoid validating PROT_NONE pages.
>  */
> #define pgprot_huge_to_pte(prot) \
>       __pgprot((pgprot_val(prot) & ~(PMD_TYPE_MASK & ~PTE_VALID)) | \
>              (PTE_TYPE_PAGE & ~PTE_VALID))
> /*
>  * Drops Page marker, sets Block marker.
>  * Strictly preserves the PTE_VALID bit.
>  */
> #define pgprot_pte_to_huge(prot) \
>       __pgprot((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
>              (PMD_TYPE_SECT & ~PTE_VALID))
> 
> Usage:
> 1.  Creating a huge pfnmap (remap_try_huge_pmd)
> pgprot_t huge_prot = pgprot_pte_to_huge(prot);
> 
> /* No need for pmd_mkhuge() */
> pmd_t entry = pmd_mkspecial(pfn_pmd(pfn, huge_prot));
> set_pmd_at(mm, addr, pmd, entry);
> 
> 2. Splitting a huge pfnmap (__split_huge_pmd_locked)
> pgprot_t small_prot = pgprot_huge_to_pte(pmd_pgprot(old_pmd));
> 
> /* No need for pte_clrhuge() */
> pte_t entry = pfn_pte(pmd_pfn(old_pmd), small_prot);
> set_ptes(mm, haddr, pte, entry, HPAGE_PMD_NR);
> 
> 
> Willy, is there a better architectural approach to handle this and satisfy
> the type-safety requirement given the x86 hardware constraints?
> 
> -- 
> Thanks,
> Yin Tirui
> 
> 
Re: [PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Yin Tirui 3 weeks, 2 days ago

On 3/6/2026 12:25 PM, Matthew Wilcox wrote:
> 
> I don't think we should have pgprot_large_2_4k().  Or rather, I think it
> should be embedded in pmd_pgprot() / pud_pgprot().  That is, we should
> have an 'ideal' pgprot which, on x86, perhaps matches that used by the
> 4k level.  pfn_pmd() should be converting from the ideal pgprot to
> that actually used by PMDs (and setting _PAGE_PSE?)
> 

Hi Willy,

I will take this route and implement the embedded approach for the v4 
respin.

-- 
Yin Tirui
Re: [PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Jürgen Groß 4 weeks ago
On 05.03.26 10:38, Yin Tirui wrote:
> 
> On 3/4/2026 3:52 PM, Jürgen Groß wrote:
>> On 28.02.26 08:09, Yin Tirui wrote:
>>> A fundamental principle of page table type safety is that `pte_t` represents
>>> the lowest level page table entry and should never carry huge page attributes.
>>>
>>> Currently, passing a pgprot with huge page bits (e.g., extracted via
>>> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the huge
>>> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti- pattern.
>>>
>>> Enforce type safety by making `pfn_pte()` inherently filter out huge page
>>> attributes:
>>> - On x86: Strip the `_PAGE_PSE` bit.
>>> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
>>>    enforce the `PTE_TYPE_PAGE` format.
>>> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
>>>    exact same hardware format and do not use a distinct huge bit.
>>>
>>> Signed-off-by: Yin Tirui <yintirui@huawei.com>
>>> ---
>>>   arch/arm64/include/asm/pgtable.h | 4 +++-
>>>   arch/x86/include/asm/pgtable.h   | 4 ++++
>>>   2 files changed, 7 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/ asm/ 
>>> pgtable.h
>>> index b3e58735c49b..f2a7a40106d2 100644
>>> --- a/arch/arm64/include/asm/pgtable.h
>>> +++ b/arch/arm64/include/asm/pgtable.h
>>> @@ -141,7 +141,9 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>>>   #define pte_pfn(pte)        (__pte_to_phys(pte) >> PAGE_SHIFT)
>>>   #define pfn_pte(pfn,prot)    \
>>> -    __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | 
>>> pgprot_val(prot))
>>> +    __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
>>> +        ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
>>> +        (PTE_TYPE_PAGE & ~PTE_VALID)))
>>>   #define pte_none(pte)        (!pte_val(pte))
>>>   #define pte_page(pte)        (pfn_to_page(pte_pfn(pte)))
>>> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/ pgtable.h
>>> index 1662c5a8f445..a4dbd81d42bf 100644
>>> --- a/arch/x86/include/asm/pgtable.h
>>> +++ b/arch/x86/include/asm/pgtable.h
>>> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t pgprot)
>>>   static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
>>>   {
>>>       phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
>>> +
>>> +    /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page bit */
>>> +    pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
>>
>> Is it really a good idea to silently drop the bit?
>>
>> Today it can either be used for a large page (which should be a pmd,
>> of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
>> at the same position in PTEs.
>>
>> So basically you are removing the ability to use some cache modes.
>>
>> NACK!
>>
>>
>> Juergen
> 
> Hi Willy and Jürgen,
> 
> Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue.
> 
> To achieve the goal of keeping pfn_pte() pure and completely eradicating the 
> pte_clrhuge() anti-pattern, we need a way to ensure pfn_pte() never receives a 
> pgprot with the huge bit set.
> 
> @Jürgen:
> Just to be absolutely certain: is there any safe way to filter out the huge page 
> attributes directly inside x86's pfn_pte() without breaking PAT? Or does the 
> hardware bit-aliasing make this strictly impossible at the pfn_pte() level?

There is no huge bit at the PTE level. It is existing only at the PMD and the
PUD level.

So: yes, it is absolutely impossible to filter it out, as the bit has a
different meaning in "real" PTEs (with "PTE" having the meaning: a translation
entry in a page referenced by a PMD entry not having the PSE bit set).

> 
> @Willy @Jürgen:
> Assuming it is impossible to filter this safely inside pfn_pte() on x86, we must 
> translate the pgprot before passing it down. To maintain strict type-safety and 
> still drop pte_clrhuge(), I plan to introduce two arch-neutral wrappers:
> 
> x86:
> /* Translates large prot to 4K. Shifts PAT back to bit 7, inherently clearing 
> _PAGE_PSE */
> #define pgprot_huge_to_pte(prot)    pgprot_large_2_4k(prot)
> /* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets _PAGE_PSE */
> #define pgprot_pte_to_huge(prot) __pgprot(pgprot_val(pgprot_4k_2_large(prot)) | 
> _PAGE_PSE)

Seems to be okay.


Juergen
Re: [PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Yin Tirui 3 weeks, 2 days ago

On 3/5/2026 6:05 PM, Jürgen Groß wrote:
>> Hi Willy and Jürgen,
>>
>> Following up on the x86 _PAGE_PSE and _PAGE_PAT aliasing issue.
>>
>> To achieve the goal of keeping pfn_pte() pure and completely 
>> eradicating the pte_clrhuge() anti-pattern, we need a way to ensure 
>> pfn_pte() never receives a pgprot with the huge bit set.
>>
>> @Jürgen:
>> Just to be absolutely certain: is there any safe way to filter out the 
>> huge page attributes directly inside x86's pfn_pte() without breaking 
>> PAT? Or does the hardware bit-aliasing make this strictly impossible 
>> at the pfn_pte() level?
> 
> There is no huge bit at the PTE level. It is existing only at the PMD 
> and the
> PUD level.
> 
> So: yes, it is absolutely impossible to filter it out, as the bit has a
> different meaning in "real" PTEs (with "PTE" having the meaning: a 
> translation
> entry in a page referenced by a PMD entry not having the PSE bit set).
> 
Hi Jürgen,

Thank you for your confirmation.

>>
>> @Willy @Jürgen:
>> Assuming it is impossible to filter this safely inside pfn_pte() on 
>> x86, we must translate the pgprot before passing it down. To maintain 
>> strict type-safety and still drop pte_clrhuge(), I plan to introduce 
>> two arch-neutral wrappers:
>>
>> x86:
>> /* Translates large prot to 4K. Shifts PAT back to bit 7, inherently 
>> clearing _PAGE_PSE */
>> #define pgprot_huge_to_pte(prot)    pgprot_large_2_4k(prot)
>> /* Translates 4K prot to large. Shifts PAT to bit 12, strictly sets 
>> _PAGE_PSE */
>> #define pgprot_pte_to_huge(prot) 
>> __pgprot(pgprot_val(pgprot_4k_2_large(prot)) | _PAGE_PSE)
> 
> Seems to be okay.
While the wrapper approach handles the aliasing, Willy recently 
suggested taking it a step further by embedding this translation 
directly into `pfn_pmd()` and `pmd_pgprot()`.

I am going to explore this embedded approach for the v4 respin.

-- 
Yin Tirui

Re: [PATCH RFC v3 2/4] mm/pgtable: Make pfn_pte() filter out huge page attributes
Posted by Yin Tirui 4 weeks, 1 day ago

On 3/4/2026 3:52 PM, Jürgen Groß wrote:
> On 28.02.26 08:09, Yin Tirui wrote:
>> A fundamental principle of page table type safety is that `pte_t` 
>> represents
>> the lowest level page table entry and should never carry huge page 
>> attributes.
>>
>> Currently, passing a pgprot with huge page bits (e.g., extracted via
>> pmd_pgprot()) into pfn_pte() creates a malformed PTE that retains the 
>> huge
>> attribute, leading to the necessity of the ugly `pte_clrhuge()` anti- 
>> pattern.
>>
>> Enforce type safety by making `pfn_pte()` inherently filter out huge page
>> attributes:
>> - On x86: Strip the `_PAGE_PSE` bit.
>> - On ARM64: Mask out the block descriptor bits in `PTE_TYPE_MASK` and
>>    enforce the `PTE_TYPE_PAGE` format.
>> - On RISC-V: No changes required, as RISC-V leaf PMDs and PTEs share the
>>    exact same hardware format and do not use a distinct huge bit.
>>
>> Signed-off-by: Yin Tirui <yintirui@huawei.com>
>> ---
>>   arch/arm64/include/asm/pgtable.h | 4 +++-
>>   arch/x86/include/asm/pgtable.h   | 4 ++++
>>   2 files changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/ 
>> asm/pgtable.h
>> index b3e58735c49b..f2a7a40106d2 100644
>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -141,7 +141,9 @@ static inline pteval_t 
>> __phys_to_pte_val(phys_addr_t phys)
>>   #define pte_pfn(pte)        (__pte_to_phys(pte) >> PAGE_SHIFT)
>>   #define pfn_pte(pfn,prot)    \
>> -    __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | 
>> pgprot_val(prot))
>> +    __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | \
>> +        ((pgprot_val(prot) & ~(PTE_TYPE_MASK & ~PTE_VALID)) | \
>> +        (PTE_TYPE_PAGE & ~PTE_VALID)))
>>   #define pte_none(pte)        (!pte_val(pte))
>>   #define pte_page(pte)        (pfn_to_page(pte_pfn(pte)))
>> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/ 
>> pgtable.h
>> index 1662c5a8f445..a4dbd81d42bf 100644
>> --- a/arch/x86/include/asm/pgtable.h
>> +++ b/arch/x86/include/asm/pgtable.h
>> @@ -738,6 +738,10 @@ static inline pgprotval_t check_pgprot(pgprot_t 
>> pgprot)
>>   static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
>>   {
>>       phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT;
>> +
>> +    /* Filter out _PAGE_PSE to ensure PTEs never carry the huge page 
>> bit */
>> +    pgprot = __pgprot(pgprot_val(pgprot) & ~_PAGE_PSE);
> 
> Is it really a good idea to silently drop the bit?
> 
> Today it can either be used for a large page (which should be a pmd,
> of course), or - much worse - you'd strip the _PAGE_PAT bit, which is
> at the same position in PTEs.
> 
> So basically you are removing the ability to use some cache modes.
> 
> NACK!
> 
> 
> Juergen

Hi Jürgen,

You are absolutely right. I missed the fact that `_PAGE_PSE` aliases 
with `_PAGE_PAT` on 4K PTEs.

The intention here was to follow previous feedback to enforce type 
safety by filtering out huge page attributes directly inside 
`pfn_pte()`. However, doing it this way obviously breaks the cache modes 
on x86.

I agree with the NACK. I will drop this approach and rethink how to 
handle the huge-to-normal pgprot conversion safely for v4.

-- 
Thanks,
Yin Tirui