[v2] Add support for shared PTEs across processes

[PATCH v2 13/20] x86/mm: enable page table sharing

Posted by Anthony Yznaga 10 months, 1 week ago

Enable x86 support for handling page faults in an mshare region by
redirecting page faults to operate on the mshare mm_struct and vmas
contained in it.
Some permissions checks are done using vma flags in architecture-specfic
fault handling code so the actual vma needed to complete the handling
is acquired before calling handle_mm_fault(). Because of this an
ARCH_SUPPORTS_MSHARE config option is added.

Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 arch/Kconfig        |  3 +++
 arch/x86/Kconfig    |  1 +
 arch/x86/mm/fault.c | 37 ++++++++++++++++++++++++++++++++++++-
 mm/Kconfig          |  2 +-
 4 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 9f6eb09ef12d..2e000fefe9b3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1652,6 +1652,9 @@ config HAVE_ARCH_PFN_VALID
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	bool
 
+config ARCH_SUPPORTS_MSHARE
+	bool
+
 config ARCH_SUPPORTS_PAGE_TABLE_CHECK
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1502fd0c3c06..1f1779decb44 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -125,6 +125,7 @@ config X86
 	select ARCH_SUPPORTS_ACPI
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select ARCH_SUPPORTS_MSHARE		if X86_64
 	select ARCH_SUPPORTS_PAGE_TABLE_CHECK	if X86_64
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 296d294142c8..49659d2f9316 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1216,6 +1216,8 @@ void do_user_addr_fault(struct pt_regs *regs,
 	struct mm_struct *mm;
 	vm_fault_t fault;
 	unsigned int flags = FAULT_FLAG_DEFAULT;
+	bool is_shared_vma;
+	unsigned long addr;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1329,6 +1331,12 @@ void do_user_addr_fault(struct pt_regs *regs,
 	if (!vma)
 		goto lock_mmap;
 
+	/* mshare does not support per-VMA locks yet */
+	if (vma_is_mshare(vma)) {
+		vma_end_read(vma);
+		goto lock_mmap;
+	}
+
 	if (unlikely(access_error(error_code, vma))) {
 		bad_area_access_error(regs, error_code, address, NULL, vma);
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
@@ -1357,17 +1365,38 @@ void do_user_addr_fault(struct pt_regs *regs,
 lock_mmap:
 
 retry:
+	addr = address;
+	is_shared_vma = false;
 	vma = lock_mm_and_find_vma(mm, address, regs);
 	if (unlikely(!vma)) {
 		bad_area_nosemaphore(regs, error_code, address);
 		return;
 	}
 
+	if (unlikely(vma_is_mshare(vma))) {
+		fault = find_shared_vma(&vma, &addr);
+
+		if (fault) {
+			mmap_read_unlock(mm);
+			goto done;
+		}
+
+		if (!vma) {
+			mmap_read_unlock(mm);
+			bad_area_nosemaphore(regs, error_code, address);
+			return;
+		}
+
+		is_shared_vma = true;
+	}
+
 	/*
 	 * Ok, we have a good vm_area for this memory access, so
 	 * we can handle it..
 	 */
 	if (unlikely(access_error(error_code, vma))) {
+		if (unlikely(is_shared_vma))
+			mmap_read_unlock(vma->vm_mm);
 		bad_area_access_error(regs, error_code, address, mm, vma);
 		return;
 	}
@@ -1385,7 +1414,11 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 * userland). The return to userland is identified whenever
 	 * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
 	 */
-	fault = handle_mm_fault(vma, address, flags, regs);
+	fault = handle_mm_fault(vma, addr, flags, regs);
+
+	if (unlikely(is_shared_vma) && ((fault & VM_FAULT_COMPLETED) ||
+	    (fault & VM_FAULT_RETRY) || fault_signal_pending(fault, regs)))
+		mmap_read_unlock(mm);
 
 	if (fault_signal_pending(fault, regs)) {
 		/*
@@ -1413,6 +1446,8 @@ void do_user_addr_fault(struct pt_regs *regs,
 		goto retry;
 	}
 
+	if (unlikely(is_shared_vma))
+		mmap_read_unlock(vma->vm_mm);
 	mmap_read_unlock(mm);
 done:
 	if (likely(!(fault & VM_FAULT_ERROR)))
diff --git a/mm/Kconfig b/mm/Kconfig
index e6c90db83d01..8a5a159457f2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1344,7 +1344,7 @@ config PT_RECLAIM
 
 config MSHARE
 	bool "Mshare"
-	depends on MMU
+	depends on MMU && ARCH_SUPPORTS_MSHARE
 	help
 	  Enable msharefs: A ram-based filesystem that allows multiple
 	  processes to share page table entries for shared pages. A file
-- 
2.43.5

Re: [PATCH v2 13/20] x86/mm: enable page table sharing

Posted by Yongting Lin 6 months ago

Hi,

On 4/4/25 10:18 AM, Anthony Yznaga wrote:
> Enable x86 support for handling page faults in an mshare region by
> redirecting page faults to operate on the mshare mm_struct and vmas
> contained in it.
> Some permissions checks are done using vma flags in architecture-specfic
> fault handling code so the actual vma needed to complete the handling
> is acquired before calling handle_mm_fault(). Because of this an
> ARCH_SUPPORTS_MSHARE config option is added.
>
> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
> ---
>  arch/Kconfig        |  3 +++
>  arch/x86/Kconfig    |  1 +
>  arch/x86/mm/fault.c | 37 ++++++++++++++++++++++++++++++++++++-
>  mm/Kconfig          |  2 +-
>  4 files changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 9f6eb09ef12d..2e000fefe9b3 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1652,6 +1652,9 @@ config HAVE_ARCH_PFN_VALID
>  config ARCH_SUPPORTS_DEBUG_PAGEALLOC
>  	bool
>  
> +config ARCH_SUPPORTS_MSHARE
> +	bool
> +
>  config ARCH_SUPPORTS_PAGE_TABLE_CHECK
>  	bool
>  
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 1502fd0c3c06..1f1779decb44 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -125,6 +125,7 @@ config X86
>  	select ARCH_SUPPORTS_ACPI
>  	select ARCH_SUPPORTS_ATOMIC_RMW
>  	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
> +	select ARCH_SUPPORTS_MSHARE		if X86_64
>  	select ARCH_SUPPORTS_PAGE_TABLE_CHECK	if X86_64
>  	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
>  	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 296d294142c8..49659d2f9316 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -1216,6 +1216,8 @@ void do_user_addr_fault(struct pt_regs *regs,
>  	struct mm_struct *mm;
>  	vm_fault_t fault;
>  	unsigned int flags = FAULT_FLAG_DEFAULT;
> +	bool is_shared_vma;
> +	unsigned long addr;
>  
>  	tsk = current;
>  	mm = tsk->mm;
> @@ -1329,6 +1331,12 @@ void do_user_addr_fault(struct pt_regs *regs,
>  	if (!vma)
>  		goto lock_mmap;
>  
> +	/* mshare does not support per-VMA locks yet */
> +	if (vma_is_mshare(vma)) {
> +		vma_end_read(vma);
> +		goto lock_mmap;
> +	}
> +
>  	if (unlikely(access_error(error_code, vma))) {
>  		bad_area_access_error(regs, error_code, address, NULL, vma);
>  		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> @@ -1357,17 +1365,38 @@ void do_user_addr_fault(struct pt_regs *regs,
>  lock_mmap:
>  
>  retry:
> +	addr = address;
> +	is_shared_vma = false;
>  	vma = lock_mm_and_find_vma(mm, address, regs);
>  	if (unlikely(!vma)) {
>  		bad_area_nosemaphore(regs, error_code, address);
>  		return;
>  	}
>  
> +	if (unlikely(vma_is_mshare(vma))) {
> +		fault = find_shared_vma(&vma, &addr);
> +
> +		if (fault) {
> +			mmap_read_unlock(mm);
> +			goto done;
> +		}
> +
> +		if (!vma) {
> +			mmap_read_unlock(mm);
> +			bad_area_nosemaphore(regs, error_code, address);
> +			return;
> +		}
> +
> +		is_shared_vma = true;
> +	}
> +
>  	/*
>  	 * Ok, we have a good vm_area for this memory access, so
>  	 * we can handle it..
>  	 */
>  	if (unlikely(access_error(error_code, vma))) {
> +		if (unlikely(is_shared_vma))
> +			mmap_read_unlock(vma->vm_mm);
>  		bad_area_access_error(regs, error_code, address, mm, vma);
>  		return;
>  	}
> @@ -1385,7 +1414,11 @@ void do_user_addr_fault(struct pt_regs *regs,
>  	 * userland). The return to userland is identified whenever
>  	 * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
>  	 */
> -	fault = handle_mm_fault(vma, address, flags, regs);
> +	fault = handle_mm_fault(vma, addr, flags, regs);
> +
> +	if (unlikely(is_shared_vma) && ((fault & VM_FAULT_COMPLETED) ||
> +	    (fault & VM_FAULT_RETRY) || fault_signal_pending(fault, regs)))
> +		mmap_read_unlock(mm);

I was backporting these patches of mshare to 5.15 kernel and trying to do some
basic tests. Then found a potential issue.

Reaching here means find_shared_vma function has been executed successfully 
and host_mm->mmap_lock has got locked.

When returned fault variable has VM_FAULT_COMPLETED or VM_FAULT_RETRY flags,
or fault_signal_pending(fault, regs) takes true, there is not chance to release
locks of both mm and host_mm(i.e. vma->vm_mm) in the following Snippet of Code.

As a result, needs to release vma->vm_mm.mmap_lock as well.

So it is supposed to be like below:

-	fault = handle_mm_fault(vma, address, flags, regs);
+	fault = handle_mm_fault(vma, addr, flags, regs);
+
+	if (unlikely(is_shared_vma) && ((fault & VM_FAULT_COMPLETED) ||
+	    (fault & VM_FAULT_RETRY) || fault_signal_pending(fault, regs))) {
+		mmap_read_unlock(vma->vm_mm);
+		mmap_read_unlock(mm);
+	}

>  
>  	if (fault_signal_pending(fault, regs)) {
>  		/*
> @@ -1413,6 +1446,8 @@ void do_user_addr_fault(struct pt_regs *regs,
>  		goto retry;
>  	}
>  
> +	if (unlikely(is_shared_vma))
> +		mmap_read_unlock(vma->vm_mm);
>  	mmap_read_unlock(mm);
>  done:
>  	if (likely(!(fault & VM_FAULT_ERROR)))
> diff --git a/mm/Kconfig b/mm/Kconfig
> index e6c90db83d01..8a5a159457f2 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -1344,7 +1344,7 @@ config PT_RECLAIM
>  
>  config MSHARE
>  	bool "Mshare"
> -	depends on MMU
> +	depends on MMU && ARCH_SUPPORTS_MSHARE
>  	help
>  	  Enable msharefs: A ram-based filesystem that allows multiple
>  	  processes to share page table entries for shared pages. A file

Yongting Lin.

Re: [PATCH v2 13/20] x86/mm: enable page table sharing

Posted by Anthony Yznaga 6 months ago


On 8/12/25 6:46 AM, Yongting Lin wrote:
> Hi,
> 
> On 4/4/25 10:18 AM, Anthony Yznaga wrote:
>> Enable x86 support for handling page faults in an mshare region by
>> redirecting page faults to operate on the mshare mm_struct and vmas
>> contained in it.
>> Some permissions checks are done using vma flags in architecture-specfic
>> fault handling code so the actual vma needed to complete the handling
>> is acquired before calling handle_mm_fault(). Because of this an
>> ARCH_SUPPORTS_MSHARE config option is added.
>>
>> Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
>> ---
>>   arch/Kconfig        |  3 +++
>>   arch/x86/Kconfig    |  1 +
>>   arch/x86/mm/fault.c | 37 ++++++++++++++++++++++++++++++++++++-
>>   mm/Kconfig          |  2 +-
>>   4 files changed, 41 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index 9f6eb09ef12d..2e000fefe9b3 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -1652,6 +1652,9 @@ config HAVE_ARCH_PFN_VALID
>>   config ARCH_SUPPORTS_DEBUG_PAGEALLOC
>>   	bool
>>   
>> +config ARCH_SUPPORTS_MSHARE
>> +	bool
>> +
>>   config ARCH_SUPPORTS_PAGE_TABLE_CHECK
>>   	bool
>>   
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 1502fd0c3c06..1f1779decb44 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -125,6 +125,7 @@ config X86
>>   	select ARCH_SUPPORTS_ACPI
>>   	select ARCH_SUPPORTS_ATOMIC_RMW
>>   	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
>> +	select ARCH_SUPPORTS_MSHARE		if X86_64
>>   	select ARCH_SUPPORTS_PAGE_TABLE_CHECK	if X86_64
>>   	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
>>   	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>> index 296d294142c8..49659d2f9316 100644
>> --- a/arch/x86/mm/fault.c
>> +++ b/arch/x86/mm/fault.c
>> @@ -1216,6 +1216,8 @@ void do_user_addr_fault(struct pt_regs *regs,
>>   	struct mm_struct *mm;
>>   	vm_fault_t fault;
>>   	unsigned int flags = FAULT_FLAG_DEFAULT;
>> +	bool is_shared_vma;
>> +	unsigned long addr;
>>   
>>   	tsk = current;
>>   	mm = tsk->mm;
>> @@ -1329,6 +1331,12 @@ void do_user_addr_fault(struct pt_regs *regs,
>>   	if (!vma)
>>   		goto lock_mmap;
>>   
>> +	/* mshare does not support per-VMA locks yet */
>> +	if (vma_is_mshare(vma)) {
>> +		vma_end_read(vma);
>> +		goto lock_mmap;
>> +	}
>> +
>>   	if (unlikely(access_error(error_code, vma))) {
>>   		bad_area_access_error(regs, error_code, address, NULL, vma);
>>   		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>> @@ -1357,17 +1365,38 @@ void do_user_addr_fault(struct pt_regs *regs,
>>   lock_mmap:
>>   
>>   retry:
>> +	addr = address;
>> +	is_shared_vma = false;
>>   	vma = lock_mm_and_find_vma(mm, address, regs);
>>   	if (unlikely(!vma)) {
>>   		bad_area_nosemaphore(regs, error_code, address);
>>   		return;
>>   	}
>>   
>> +	if (unlikely(vma_is_mshare(vma))) {
>> +		fault = find_shared_vma(&vma, &addr);
>> +
>> +		if (fault) {
>> +			mmap_read_unlock(mm);
>> +			goto done;
>> +		}
>> +
>> +		if (!vma) {
>> +			mmap_read_unlock(mm);
>> +			bad_area_nosemaphore(regs, error_code, address);
>> +			return;
>> +		}
>> +
>> +		is_shared_vma = true;
>> +	}
>> +
>>   	/*
>>   	 * Ok, we have a good vm_area for this memory access, so
>>   	 * we can handle it..
>>   	 */
>>   	if (unlikely(access_error(error_code, vma))) {
>> +		if (unlikely(is_shared_vma))
>> +			mmap_read_unlock(vma->vm_mm);
>>   		bad_area_access_error(regs, error_code, address, mm, vma);
>>   		return;
>>   	}
>> @@ -1385,7 +1414,11 @@ void do_user_addr_fault(struct pt_regs *regs,
>>   	 * userland). The return to userland is identified whenever
>>   	 * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
>>   	 */
>> -	fault = handle_mm_fault(vma, address, flags, regs);
>> +	fault = handle_mm_fault(vma, addr, flags, regs);
>> +
>> +	if (unlikely(is_shared_vma) && ((fault & VM_FAULT_COMPLETED) ||
>> +	    (fault & VM_FAULT_RETRY) || fault_signal_pending(fault, regs)))
>> +		mmap_read_unlock(mm);
> 
> I was backporting these patches of mshare to 5.15 kernel and trying to do some
> basic tests. Then found a potential issue.
> 
> Reaching here means find_shared_vma function has been executed successfully
> and host_mm->mmap_lock has got locked.
> 
> When returned fault variable has VM_FAULT_COMPLETED or VM_FAULT_RETRY flags,
> or fault_signal_pending(fault, regs) takes true, there is not chance to release
> locks of both mm and host_mm(i.e. vma->vm_mm) in the following Snippet of Code.

If VM_FAULT_COMPLETED or VM_FAULT_RETRY are returned then the 
host_mm->mmap_lock will already have been released. See 
fault_dirty_shared_page(), filemap_fault(), and other callers of 
maybe_unlock_mmap_for_io(). I will add a comment to help make this 
clearer. I also realized that fault_signal_pending() does not need to be 
called here because it can only return true if VM_FAULT_RETRY is set so 
I'll change that, too.

The checks in a 5.15 kernel will be different. You probably want 
something like:

         if (unlikely(is_shared_vma) && ((fault & VM_FAULT_RETRY) &&
             (flags & FAULT_FLAG_ALLOW_RETRY)) || 
fault_signal_pending(fault, regs))
                 mmap_read_unlock(mm);

Anthony

> 
> As a result, needs to release vma->vm_mm.mmap_lock as well.
> 
> So it is supposed to be like below:
> 
> -	fault = handle_mm_fault(vma, address, flags, regs);
> +	fault = handle_mm_fault(vma, addr, flags, regs);
> +
> +	if (unlikely(is_shared_vma) && ((fault & VM_FAULT_COMPLETED) ||
> +	    (fault & VM_FAULT_RETRY) || fault_signal_pending(fault, regs))) {
> +		mmap_read_unlock(vma->vm_mm);
> +		mmap_read_unlock(mm);
> +	}
> 
>>   
>>   	if (fault_signal_pending(fault, regs)) {
>>   		/*
>> @@ -1413,6 +1446,8 @@ void do_user_addr_fault(struct pt_regs *regs,
>>   		goto retry;
>>   	}
>>   
>> +	if (unlikely(is_shared_vma))
>> +		mmap_read_unlock(vma->vm_mm);
>>   	mmap_read_unlock(mm);
>>   done:
>>   	if (likely(!(fault & VM_FAULT_ERROR)))
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index e6c90db83d01..8a5a159457f2 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -1344,7 +1344,7 @@ config PT_RECLAIM
>>   
>>   config MSHARE
>>   	bool "Mshare"
>> -	depends on MMU
>> +	depends on MMU && ARCH_SUPPORTS_MSHARE
>>   	help
>>   	  Enable msharefs: A ram-based filesystem that allows multiple
>>   	  processes to share page table entries for shared pages. A file
> 
> Yongting Lin.

Re: [PATCH v2 13/20] x86/mm: enable page table sharing

Posted by Yongting Lin 5 months, 3 weeks ago

Thank you! Anthony.

Yep, I checked the comments in arch/mm/x86/fault.c file which says as your
advices in previous email.


I changed my code in kernel 5.5 as below:

       if (unlikely(is_shared_vma) && ((fault & VM_FAULT_RETRY) &&
           (flags & FAULT_FLAG_ALLOW_RETRY) || fault_signal_pending(fault, regs)))
               mmap_read_unlock(mm);

BTW: I wrote some selftests in my github repostory, which perform
the basic function of mshare, and I will write some complicated cases
to support the new functions or defect found in mshare. For example,
once you support mshare as a VMA in KVM (just as the defeat viewed by 
Jann Horn), I will add extra test cases to verify its correctiness for 
this scenario.

From Jann Horn's review:
https://lore.kernel.org/all/CAG48ez3cUZf+xOtP6UkkS2-CmOeo+3K5pvny0AFL_XBkHh5q_g@mail.gmail.com/

Currently, I put my selftest in my github repostory, and you could retrieve it
as below:

    git remote add yongting-mshare-selftests https://github.com/ivanalgo/linux-kernel-develop/
    git fetch yongting-mshare-selftests dev-mshare-v2-selftest-v1
    git cherry-pick a64f2ff6497d13c09badc0fc68c44d9995bc2fef

At this stage, I am not sure what is the best way to proceed:
- Should I send them as part of your next version (v3)?
- Or should I post them separately as [RFC PATCH] for early review?

Please let me know your preference and any sugestion is welcome.
I am happy to rebase and resend in the format that works best for
the community.

Thanks
Yongting

> Anthony
>
>>
>> As a result, needs to release vma->vm_mm.mmap_lock as well.
>>
>> So it is supposed to be like below:
>>
>> -    fault = handle_mm_fault(vma, address, flags, regs);
>> +    fault = handle_mm_fault(vma, addr, flags, regs);
>> +
>> +    if (unlikely(is_shared_vma) && ((fault & VM_FAULT_COMPLETED) ||
>> +        (fault & VM_FAULT_RETRY) || fault_signal_pending(fault, regs))) {
>> +        mmap_read_unlock(vma->vm_mm);
>> +        mmap_read_unlock(mm);
>> +    }
>>
>>>         if (fault_signal_pending(fault, regs)) {
>>>           /*
>>> @@ -1413,6 +1446,8 @@ void do_user_addr_fault(struct pt_regs *regs,
>>>           goto retry;
>>>       }
>>>   +    if (unlikely(is_shared_vma))
>>> +        mmap_read_unlock(vma->vm_mm);
>>>       mmap_read_unlock(mm);
>>>   done:
>>>       if (likely(!(fault & VM_FAULT_ERROR)))
>>> diff --git a/mm/Kconfig b/mm/Kconfig
>>> index e6c90db83d01..8a5a159457f2 100644
>>> --- a/mm/Kconfig
>>> +++ b/mm/Kconfig
>>> @@ -1344,7 +1344,7 @@ config PT_RECLAIM
>>>     config MSHARE
>>>       bool "Mshare"
>>> -    depends on MMU
>>> +    depends on MMU && ARCH_SUPPORTS_MSHARE
>>>       help
>>>         Enable msharefs: A ram-based filesystem that allows multiple
>>>         processes to share page table entries for shared pages. A file 
>>
>> Yongting Lin. 
>
>

Re: [PATCH v2 13/20] x86/mm: enable page table sharing

Posted by Anthony Yznaga 5 months, 3 weeks ago

Hi Yongting,

On 8/18/25 2:44 AM, Yongting Lin wrote:
> Thank you! Anthony.
> 
> Yep, I checked the comments in arch/mm/x86/fault.c file which says as your
> advices in previous email.
> 
> 
> I changed my code in kernel 5.5 as below:
> 
>         if (unlikely(is_shared_vma) && ((fault & VM_FAULT_RETRY) &&
>             (flags & FAULT_FLAG_ALLOW_RETRY) || fault_signal_pending(fault, regs)))
>                 mmap_read_unlock(mm);
> 
> BTW: I wrote some selftests in my github repostory, which perform
> the basic function of mshare, and I will write some complicated cases
> to support the new functions or defect found in mshare. For example,
> once you support mshare as a VMA in KVM (just as the defeat viewed by
> Jann Horn), I will add extra test cases to verify its correctiness for
> this scenario.

This is great! I'll take a look at them in more detail. I just sent out 
an updated series.

> 
>  From Jann Horn's review:
> https://lore.kernel.org/all/CAG48ez3cUZf+xOtP6UkkS2-CmOeo+3K5pvny0AFL_XBkHh5q_g@mail.gmail.com/

My new series does not yet have support for mmu notifiers. It's 
something I'm working on, but there are key issues to overcome. One is 
that I need to update the implementation of mm_take_all_locks() to also 
carefully take all locks in any mapped mshare regions. The other is that 
passing through mmu notifier calls for arch_invalidate_secondary_tlbs 
callbacks is especially tricky because the callback is not allowed to 
sleep due to holding a ptl spin lock.

> 
> Currently, I put my selftest in my github repostory, and you could retrieve it
> as below:
> 
>      git remote add yongting-mshare-selftests https://github.com/ivanalgo/linux-kernel-develop/
>      git fetch yongting-mshare-selftests dev-mshare-v2-selftest-v1
>      git cherry-pick a64f2ff6497d13c09badc0fc68c44d9995bc2fef
> 
> At this stage, I am not sure what is the best way to proceed:
> - Should I send them as part of your next version (v3)?
> - Or should I post them separately as [RFC PATCH] for early review?
> 
> Please let me know your preference and any sugestion is welcome.
> I am happy to rebase and resend in the format that works best for
> the community.

I may have more feedback once I take look. I suggest starting by 
updating them to work with the series I just sent out.

Thanks,
Anthony
  >
> Thanks
> Yongting
> 
>> Anthony
>>
>>>
>>> As a result, needs to release vma->vm_mm.mmap_lock as well.
>>>
>>> So it is supposed to be like below:
>>>
>>> -    fault = handle_mm_fault(vma, address, flags, regs);
>>> +    fault = handle_mm_fault(vma, addr, flags, regs);
>>> +
>>> +    if (unlikely(is_shared_vma) && ((fault & VM_FAULT_COMPLETED) ||
>>> +        (fault & VM_FAULT_RETRY) || fault_signal_pending(fault, regs))) {
>>> +        mmap_read_unlock(vma->vm_mm);
>>> +        mmap_read_unlock(mm);
>>> +    }
>>>
>>>>          if (fault_signal_pending(fault, regs)) {
>>>>            /*
>>>> @@ -1413,6 +1446,8 @@ void do_user_addr_fault(struct pt_regs *regs,
>>>>            goto retry;
>>>>        }
>>>>    +    if (unlikely(is_shared_vma))
>>>> +        mmap_read_unlock(vma->vm_mm);
>>>>        mmap_read_unlock(mm);
>>>>    done:
>>>>        if (likely(!(fault & VM_FAULT_ERROR)))
>>>> diff --git a/mm/Kconfig b/mm/Kconfig
>>>> index e6c90db83d01..8a5a159457f2 100644
>>>> --- a/mm/Kconfig
>>>> +++ b/mm/Kconfig
>>>> @@ -1344,7 +1344,7 @@ config PT_RECLAIM
>>>>      config MSHARE
>>>>        bool "Mshare"
>>>> -    depends on MMU
>>>> +    depends on MMU && ARCH_SUPPORTS_MSHARE
>>>>        help
>>>>          Enable msharefs: A ram-based filesystem that allows multiple
>>>>          processes to share page table entries for shared pages. A file
>>>
>>> Yongting Lin.
>>
>>