There are more and more architectures enabled ARCH_SUPPORTS_PER_VMA_LOCK,
eg, x86, arm64, powerpc and s390, and riscv, those implementation are very
similar which results in some duplicated codes, let's add a generic VMA
lock-based page fault handler to eliminate them, and which also make it
easy to support this feature on new architectures.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/mm.h | 28 ++++++++++++++++++++++++++++
mm/memory.c | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c7886784832b..cba1b7b19c9d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -633,6 +633,15 @@ static inline void vma_numab_state_init(struct vm_area_struct *vma) {}
static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
#endif /* CONFIG_NUMA_BALANCING */
+struct vm_locked_fault {
+ struct mm_struct *mm;
+ unsigned long address;
+ unsigned int fault_flags;
+ unsigned long vm_flags;
+ struct pt_regs *regs;
+ unsigned long fault_code;
+};
+
#ifdef CONFIG_PER_VMA_LOCK
/*
* Try to read-lock a vma. The function is allowed to occasionally yield false
@@ -733,6 +742,19 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address);
+#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
+ _name.mm = _mm; \
+ _name.address = _address; \
+ _name.fault_flags = _fault_flags; \
+ _name.vm_flags = _vm_flags; \
+ _name.regs = _regs; \
+ _name.fault_code = _fault_code
+
+int __weak arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf);
+
+int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret);
+
#else /* CONFIG_PER_VMA_LOCK */
static inline bool vma_start_read(struct vm_area_struct *vma)
@@ -742,6 +764,12 @@ static inline void vma_start_write(struct vm_area_struct *vma) {}
static inline void vma_assert_write_locked(struct vm_area_struct *vma) {}
static inline void vma_mark_detached(struct vm_area_struct *vma,
bool detached) {}
+#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code)
+static inline int try_vma_locked_page_fault(struct vm_locked_fault *vmlf,
+ vm_fault_t *ret)
+{
+ return -EINVAL;
+}
static inline void release_fault_lock(struct vm_fault *vmf)
{
diff --git a/mm/memory.c b/mm/memory.c
index ad790394963a..d3f5d1270e7a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5449,6 +5449,48 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
count_vm_vma_lock_event(VMA_LOCK_ABORT);
return NULL;
}
+
+int __weak arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf)
+{
+ if (!(vma->vm_flags & vmlf->vm_flags))
+ return -EINVAL;
+ return 0;
+}
+
+int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
+{
+ struct vm_area_struct *vma;
+ vm_fault_t fault;
+
+ if (!(vmlf->fault_flags & FAULT_FLAG_USER))
+ return -EINVAL;
+
+ vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
+ if (!vma)
+ return -EINVAL;
+
+ if (arch_vma_check_access(vma, vmlf)) {
+ vma_end_read(vma);
+ return -EINVAL;
+ }
+
+ fault = handle_mm_fault(vma, vmlf->address,
+ vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
+ vmlf->regs);
+ *ret = fault;
+
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
+
+ if ((fault & VM_FAULT_RETRY))
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ else
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+
+ return 0;
+}
+
#endif /* CONFIG_PER_VMA_LOCK */
#ifndef __PAGETABLE_P4D_FOLDED
--
2.27.0
> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> +{
> + struct vm_area_struct *vma;
> + vm_fault_t fault;
On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
> +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
> + _name.mm = _mm; \
> + _name.address = _address; \
> + _name.fault_flags = _fault_flags; \
> + _name.vm_flags = _vm_flags; \
> + _name.regs = _regs; \
> + _name.fault_code = _fault_code
More consolidated code is a good idea; no question. But I don't think
this is the right way to do it.
> +int __weak arch_vma_check_access(struct vm_area_struct *vma,
> + struct vm_locked_fault *vmlf);
This should be:
#ifndef vma_check_access
bool vma_check_access(struct vm_area_struct *vma, )
{
return (vma->vm_flags & vm_flags) == 0;
}
#endif
and then arches which want to do something different can just define
vma_check_access.
> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> +{
> + struct vm_area_struct *vma;
> + vm_fault_t fault;
Declaring the vmf in this function and then copying it back is just wrong.
We need to declare vm_fault_t earlier (in the arch fault handler) and
pass it in. I don't think that creating struct vm_locked_fault is the
right idea either.
> + if (!(vmlf->fault_flags & FAULT_FLAG_USER))
> + return -EINVAL;
> +
> + vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
> + if (!vma)
> + return -EINVAL;
> +
> + if (arch_vma_check_access(vma, vmlf)) {
> + vma_end_read(vma);
> + return -EINVAL;
> + }
> +
> + fault = handle_mm_fault(vma, vmlf->address,
> + vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
> + vmlf->regs);
> + *ret = fault;
> +
> + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> + vma_end_read(vma);
> +
> + if ((fault & VM_FAULT_RETRY))
> + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> + else
> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> +
> + return 0;
> +}
> +
> #endif /* CONFIG_PER_VMA_LOCK */
>
> #ifndef __PAGETABLE_P4D_FOLDED
> --
> 2.27.0
>
>
On Thu, Jul 13, 2023 at 9:15 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> > +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> > +{
> > + struct vm_area_struct *vma;
> > + vm_fault_t fault;
>
>
> On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
> > +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
> > + _name.mm = _mm; \
> > + _name.address = _address; \
> > + _name.fault_flags = _fault_flags; \
> > + _name.vm_flags = _vm_flags; \
> > + _name.regs = _regs; \
> > + _name.fault_code = _fault_code
>
> More consolidated code is a good idea; no question. But I don't think
> this is the right way to do it.
>
> > +int __weak arch_vma_check_access(struct vm_area_struct *vma,
> > + struct vm_locked_fault *vmlf);
>
> This should be:
>
> #ifndef vma_check_access
> bool vma_check_access(struct vm_area_struct *vma, )
> {
> return (vma->vm_flags & vm_flags) == 0;
> }
> #endif
>
> and then arches which want to do something different can just define
> vma_check_access.
>
> > +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> > +{
> > + struct vm_area_struct *vma;
> > + vm_fault_t fault;
>
> Declaring the vmf in this function and then copying it back is just wrong.
> We need to declare vm_fault_t earlier (in the arch fault handler) and
> pass it in.
Did you mean to say "we need to declare vmf (struct vm_fault) earlier
(in the arch fault handler) and pass it in." ?
> I don't think that creating struct vm_locked_fault is the
> right idea either.
>
> > + if (!(vmlf->fault_flags & FAULT_FLAG_USER))
> > + return -EINVAL;
> > +
> > + vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
> > + if (!vma)
> > + return -EINVAL;
> > +
> > + if (arch_vma_check_access(vma, vmlf)) {
> > + vma_end_read(vma);
> > + return -EINVAL;
> > + }
> > +
> > + fault = handle_mm_fault(vma, vmlf->address,
> > + vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
> > + vmlf->regs);
> > + *ret = fault;
> > +
> > + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> > + vma_end_read(vma);
> > +
> > + if ((fault & VM_FAULT_RETRY))
> > + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> > + else
> > + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> > +
> > + return 0;
> > +}
> > +
> > #endif /* CONFIG_PER_VMA_LOCK */
> >
> > #ifndef __PAGETABLE_P4D_FOLDED
> > --
> > 2.27.0
> >
> >
On 2023/7/14 4:12, Suren Baghdasaryan wrote:
> On Thu, Jul 13, 2023 at 9:15 AM Matthew Wilcox <willy@infradead.org> wrote:
>>
>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
>>> +{
>>> + struct vm_area_struct *vma;
>>> + vm_fault_t fault;
>>
>>
>> On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
>>> +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
>>> + _name.mm = _mm; \
>>> + _name.address = _address; \
>>> + _name.fault_flags = _fault_flags; \
>>> + _name.vm_flags = _vm_flags; \
>>> + _name.regs = _regs; \
>>> + _name.fault_code = _fault_code
>>
>> More consolidated code is a good idea; no question. But I don't think
>> this is the right way to do it.
I agree it is not good enough, but the arch's vma check acess has
different implementation, some use vm flags, some need fault code and
regs, and some use both :(
>>
>>> +int __weak arch_vma_check_access(struct vm_area_struct *vma,
>>> + struct vm_locked_fault *vmlf);
>>
>> This should be:
>>
>> #ifndef vma_check_access
>> bool vma_check_access(struct vm_area_struct *vma, )
>> {
>> return (vma->vm_flags & vm_flags) == 0;
>> }
>> #endif
>>
>> and then arches which want to do something different can just define
>> vma_check_access.
Ok, I could convert to use this way.
>>
>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
>>> +{
>>> + struct vm_area_struct *vma;
>>> + vm_fault_t fault;
>>
>> Declaring the vmf in this function and then copying it back is just wrong.
>> We need to declare vm_fault_t earlier (in the arch fault handler) and
>> pass it in.
Actually I passed the vm_fault_t *ret(in the arch fault handler), we
could directly use *ret instead of a new local variable, and no copy.
>
> Did you mean to say "we need to declare vmf (struct vm_fault) earlier
> (in the arch fault handler) and pass it in." ?
>
>> I don't think that creating struct vm_locked_fault is the
>> right idea either.
As mentioned above for vma check access, we need many arguments for a
function, a new struct looks possible better, is there better solution
or any suggestion?
Thanks.
>>
>>> + if (!(vmlf->fault_flags & FAULT_FLAG_USER))
>>> + return -EINVAL;
>>> +
>>> + vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
>>> + if (!vma)
>>> + return -EINVAL;
>>> +
>>> + if (arch_vma_check_access(vma, vmlf)) {
>>> + vma_end_read(vma);
>>> + return -EINVAL;
>>> + }
>>> +
>>> + fault = handle_mm_fault(vma, vmlf->address,
>>> + vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
>>> + vmlf->regs);
>>> + *ret = fault;
>>> +
>>> + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>>> + vma_end_read(vma);
>>> +
>>> + if ((fault & VM_FAULT_RETRY))
>>> + count_vm_vma_lock_event(VMA_LOCK_RETRY);
>>> + else
>>> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> #endif /* CONFIG_PER_VMA_LOCK */
>>>
>>> #ifndef __PAGETABLE_P4D_FOLDED
>>> --
>>> 2.27.0
>>>
>>>
On 2023/7/14 9:52, Kefeng Wang wrote:
>
>
> On 2023/7/14 4:12, Suren Baghdasaryan wrote:
>> On Thu, Jul 13, 2023 at 9:15 AM Matthew Wilcox <willy@infradead.org>
>> wrote:
>>>
>>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf,
>>>> vm_fault_t *ret)
>>>> +{
>>>> + struct vm_area_struct *vma;
>>>> + vm_fault_t fault;
>>>
>>>
>>> On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
>>>> +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags,
>>>> _vm_flags, _regs, _fault_code) \
>>>> + _name.mm = _mm; \
>>>> + _name.address = _address; \
>>>> + _name.fault_flags = _fault_flags; \
>>>> + _name.vm_flags = _vm_flags; \
>>>> + _name.regs = _regs; \
>>>> + _name.fault_code = _fault_code
>>>
>>> More consolidated code is a good idea; no question. But I don't think
>>> this is the right way to do it.
>
> I agree it is not good enough, but the arch's vma check acess has
> different implementation, some use vm flags, some need fault code and
> regs, and some use both :(
>
>>>
>>>> +int __weak arch_vma_check_access(struct vm_area_struct *vma,
>>>> + struct vm_locked_fault *vmlf);
>>>
>>> This should be:
>>>
>>> #ifndef vma_check_access
>>> bool vma_check_access(struct vm_area_struct *vma, )
>>> {
>>> return (vma->vm_flags & vm_flags) == 0;
>>> }
>>> #endif
>>>
>>> and then arches which want to do something different can just define
>>> vma_check_access.
>
> Ok, I could convert to use this way.
>
>>>
>>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf,
>>>> vm_fault_t *ret)
>>>> +{
>>>> + struct vm_area_struct *vma;
>>>> + vm_fault_t fault;
>>>
>>> Declaring the vmf in this function and then copying it back is just
>>> wrong.
>>> We need to declare vm_fault_t earlier (in the arch fault handler) and
>>> pass it in.
>
> Actually I passed the vm_fault_t *ret(in the arch fault handler), we
> could directly use *ret instead of a new local variable, and no copy.
>>
>> Did you mean to say "we need to declare vmf (struct vm_fault) earlier
>> (in the arch fault handler) and pass it in." ?
After recheck the code, I think Matthew' idea is 'declare vmf (struct
vm_fault) earlier' like Suren said, not vm_fault_t, right? will try
this, thanks.
>>
>>> I don't think that creating struct vm_locked_fault is the
>>> right idea either.
>
> As mentioned above for vma check access, we need many arguments for a
> function, a new struct looks possible better, is there better solution
> or any suggestion?
>
> Thanks.
>
© 2016 - 2026 Red Hat, Inc.