Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++---------------------
1 file changed, 32 insertions(+), 34 deletions(-)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b1723094d464..52f9546e020e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err)
#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
#endif
+#ifdef CONFIG_PER_VMA_LOCK
+bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE;
+ int is_write = page_fault_is_write(vmf->fault_code);
+
+ if (unlikely(access_pkey_error(is_write, is_exec,
+ (vmf->fault_code & DSISR_KEYFAULT), vma)))
+ return true;
+
+ if (unlikely(access_error(is_write, is_exec, vma)))
+ return true;
+ return false;
+}
+#endif
+
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault.
@@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
{
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
- unsigned int flags = FAULT_FLAG_DEFAULT;
int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
int is_user = user_mode(regs);
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
bool kprobe_fault = kprobe_page_fault(regs, 11);
+ struct vm_fault vmf = {
+ .real_address = address,
+ .fault_code = error_code,
+ .regs = regs,
+ .flags = FAULT_FLAG_DEFAULT,
+ };
+
if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
return 0;
@@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
* mmap_lock held
*/
if (is_user)
- flags |= FAULT_FLAG_USER;
+ vmf.flags |= FAULT_FLAG_USER;
if (is_write)
- flags |= FAULT_FLAG_WRITE;
+ vmf.flags |= FAULT_FLAG_WRITE;
if (is_exec)
- flags |= FAULT_FLAG_INSTRUCTION;
+ vmf.flags |= FAULT_FLAG_INSTRUCTION;
- if (!(flags & FAULT_FLAG_USER))
- goto lock_mmap;
-
- vma = lock_vma_under_rcu(mm, address);
- if (!vma)
- goto lock_mmap;
-
- if (unlikely(access_pkey_error(is_write, is_exec,
- (error_code & DSISR_KEYFAULT), vma))) {
- vma_end_read(vma);
- goto lock_mmap;
- }
-
- if (unlikely(access_error(is_write, is_exec, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
- }
-
- fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
- if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
- vma_end_read(vma);
-
- if (!(fault & VM_FAULT_RETRY)) {
- count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ fault = try_vma_locked_page_fault(&vmf);
+ if (fault == VM_FAULT_NONE)
+ goto retry;
+ if (!(fault & VM_FAULT_RETRY))
goto done;
- }
- count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault_signal_pending(fault, regs))
return user_mode(regs) ? 0 : SIGBUS;
-lock_mmap:
-
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags, regs);
+ fault = handle_mm_fault(vma, address, vmf.flags, regs);
major |= fault & VM_FAULT_MAJOR;
@@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
* case.
*/
if (unlikely(fault & VM_FAULT_RETRY)) {
- flags |= FAULT_FLAG_TRIED;
+ vmf.flags |= FAULT_FLAG_TRIED;
goto retry;
}
--
2.27.0
Le 21/08/2023 à 14:30, Kefeng Wang a écrit :
> Use new try_vma_locked_page_fault() helper to simplify code.
> No functional change intended.
Does it really simplifies code ? It's 32 insertions versus 34 deletions
so only removing 2 lines.
I don't like the struct vm_fault you are adding because when it was four
independant variables it was handled through local registers. Now that
it is a struct it has to go via the stack, leading to unnecessary memory
read and writes. And going back and forth between architecture code and
generic code may also be counter-performant.
Did you make any performance analysis ? Page faults are really a hot
path when dealling with minor faults.
Thanks
Christophe
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
> arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++---------------------
> 1 file changed, 32 insertions(+), 34 deletions(-)
>
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index b1723094d464..52f9546e020e 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err)
> #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
> #endif
>
> +#ifdef CONFIG_PER_VMA_LOCK
> +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> + int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE;
> + int is_write = page_fault_is_write(vmf->fault_code);
> +
> + if (unlikely(access_pkey_error(is_write, is_exec,
> + (vmf->fault_code & DSISR_KEYFAULT), vma)))
> + return true;
> +
> + if (unlikely(access_error(is_write, is_exec, vma)))
> + return true;
> + return false;
> +}
> +#endif
> +
> /*
> * For 600- and 800-family processors, the error_code parameter is DSISR
> * for a data fault, SRR1 for an instruction fault.
> @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
> {
> struct vm_area_struct * vma;
> struct mm_struct *mm = current->mm;
> - unsigned int flags = FAULT_FLAG_DEFAULT;
> int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
> int is_user = user_mode(regs);
> int is_write = page_fault_is_write(error_code);
> vm_fault_t fault, major = 0;
> bool kprobe_fault = kprobe_page_fault(regs, 11);
> + struct vm_fault vmf = {
> + .real_address = address,
> + .fault_code = error_code,
> + .regs = regs,
> + .flags = FAULT_FLAG_DEFAULT,
> + };
> +
>
> if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
> return 0;
> @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
> * mmap_lock held
> */
> if (is_user)
> - flags |= FAULT_FLAG_USER;
> + vmf.flags |= FAULT_FLAG_USER;
> if (is_write)
> - flags |= FAULT_FLAG_WRITE;
> + vmf.flags |= FAULT_FLAG_WRITE;
> if (is_exec)
> - flags |= FAULT_FLAG_INSTRUCTION;
> + vmf.flags |= FAULT_FLAG_INSTRUCTION;
>
> - if (!(flags & FAULT_FLAG_USER))
> - goto lock_mmap;
> -
> - vma = lock_vma_under_rcu(mm, address);
> - if (!vma)
> - goto lock_mmap;
> -
> - if (unlikely(access_pkey_error(is_write, is_exec,
> - (error_code & DSISR_KEYFAULT), vma))) {
> - vma_end_read(vma);
> - goto lock_mmap;
> - }
> -
> - if (unlikely(access_error(is_write, is_exec, vma))) {
> - vma_end_read(vma);
> - goto lock_mmap;
> - }
> -
> - fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> - if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> - vma_end_read(vma);
> -
> - if (!(fault & VM_FAULT_RETRY)) {
> - count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> + fault = try_vma_locked_page_fault(&vmf);
> + if (fault == VM_FAULT_NONE)
> + goto retry;
> + if (!(fault & VM_FAULT_RETRY))
> goto done;
> - }
> - count_vm_vma_lock_event(VMA_LOCK_RETRY);
>
> if (fault_signal_pending(fault, regs))
> return user_mode(regs) ? 0 : SIGBUS;
>
> -lock_mmap:
> -
> /* When running in the kernel we expect faults to occur only to
> * addresses in user space. All other faults represent errors in the
> * kernel and should generate an OOPS. Unfortunately, in the case of an
> @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
> * make sure we exit gracefully rather than endlessly redo
> * the fault.
> */
> - fault = handle_mm_fault(vma, address, flags, regs);
> + fault = handle_mm_fault(vma, address, vmf.flags, regs);
>
> major |= fault & VM_FAULT_MAJOR;
>
> @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
> * case.
> */
> if (unlikely(fault & VM_FAULT_RETRY)) {
> - flags |= FAULT_FLAG_TRIED;
> + vmf.flags |= FAULT_FLAG_TRIED;
> goto retry;
> }
>
On 2023/8/22 17:38, Christophe Leroy wrote:
>
>
> Le 21/08/2023 à 14:30, Kefeng Wang a écrit :
>> Use new try_vma_locked_page_fault() helper to simplify code.
>> No functional change intended.
>
> Does it really simplifies code ? It's 32 insertions versus 34 deletions
> so only removing 2 lines.
Yes,it is unfriendly for powerpc as the arch's vma access check is much
complex than other arch,
>
> I don't like the struct vm_fault you are adding because when it was four
> independant variables it was handled through local registers. Now that
> it is a struct it has to go via the stack, leading to unnecessary memory
> read and writes. And going back and forth between architecture code and
> generic code may also be counter-performant.
Because different arch has different var to check vma access, so the
easy way to add them into vmf, I don' find a better way.
>
> Did you make any performance analysis ? Page faults are really a hot
> path when dealling with minor faults.
no, this is only built and rfc to see the feedback about the conversion.
Thanks.
>
> Thanks
> Christophe
>
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>> arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++---------------------
>> 1 file changed, 32 insertions(+), 34 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index b1723094d464..52f9546e020e 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err)
>> #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
>> #endif
>>
>> +#ifdef CONFIG_PER_VMA_LOCK
>> +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf)
>> +{
>> + int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE;
>> + int is_write = page_fault_is_write(vmf->fault_code);
>> +
>> + if (unlikely(access_pkey_error(is_write, is_exec,
>> + (vmf->fault_code & DSISR_KEYFAULT), vma)))
>> + return true;
>> +
>> + if (unlikely(access_error(is_write, is_exec, vma)))
>> + return true;
>> + return false;
>> +}
>> +#endif
>> +
>> /*
>> * For 600- and 800-family processors, the error_code parameter is DSISR
>> * for a data fault, SRR1 for an instruction fault.
>> @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>> {
>> struct vm_area_struct * vma;
>> struct mm_struct *mm = current->mm;
>> - unsigned int flags = FAULT_FLAG_DEFAULT;
>> int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
>> int is_user = user_mode(regs);
>> int is_write = page_fault_is_write(error_code);
>> vm_fault_t fault, major = 0;
>> bool kprobe_fault = kprobe_page_fault(regs, 11);
>> + struct vm_fault vmf = {
>> + .real_address = address,
>> + .fault_code = error_code,
>> + .regs = regs,
>> + .flags = FAULT_FLAG_DEFAULT,
>> + };
>> +
>>
>> if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
>> return 0;
>> @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>> * mmap_lock held
>> */
>> if (is_user)
>> - flags |= FAULT_FLAG_USER;
>> + vmf.flags |= FAULT_FLAG_USER;
>> if (is_write)
>> - flags |= FAULT_FLAG_WRITE;
>> + vmf.flags |= FAULT_FLAG_WRITE;
>> if (is_exec)
>> - flags |= FAULT_FLAG_INSTRUCTION;
>> + vmf.flags |= FAULT_FLAG_INSTRUCTION;
>>
>> - if (!(flags & FAULT_FLAG_USER))
>> - goto lock_mmap;
>> -
>> - vma = lock_vma_under_rcu(mm, address);
>> - if (!vma)
>> - goto lock_mmap;
>> -
>> - if (unlikely(access_pkey_error(is_write, is_exec,
>> - (error_code & DSISR_KEYFAULT), vma))) {
>> - vma_end_read(vma);
>> - goto lock_mmap;
>> - }
>> -
>> - if (unlikely(access_error(is_write, is_exec, vma))) {
>> - vma_end_read(vma);
>> - goto lock_mmap;
>> - }
>> -
>> - fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
>> - if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>> - vma_end_read(vma);
>> -
>> - if (!(fault & VM_FAULT_RETRY)) {
>> - count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>> + fault = try_vma_locked_page_fault(&vmf);
>> + if (fault == VM_FAULT_NONE)
>> + goto retry;
>> + if (!(fault & VM_FAULT_RETRY))
>> goto done;
>> - }
>> - count_vm_vma_lock_event(VMA_LOCK_RETRY);
>>
>> if (fault_signal_pending(fault, regs))
>> return user_mode(regs) ? 0 : SIGBUS;
>>
>> -lock_mmap:
>> -
>> /* When running in the kernel we expect faults to occur only to
>> * addresses in user space. All other faults represent errors in the
>> * kernel and should generate an OOPS. Unfortunately, in the case of an
>> @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>> * make sure we exit gracefully rather than endlessly redo
>> * the fault.
>> */
>> - fault = handle_mm_fault(vma, address, flags, regs);
>> + fault = handle_mm_fault(vma, address, vmf.flags, regs);
>>
>> major |= fault & VM_FAULT_MAJOR;
>>
>> @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>> * case.
>> */
>> if (unlikely(fault & VM_FAULT_RETRY)) {
>> - flags |= FAULT_FLAG_TRIED;
>> + vmf.flags |= FAULT_FLAG_TRIED;
>> goto retry;
>> }
>>
© 2016 - 2025 Red Hat, Inc.