Convert to using a vm_account structure to account pinned memory to
both the mm and the pins cgroup.
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Cc: Bernard Metzler <bmt@zurich.ibm.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: linux-rdma@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
drivers/infiniband/sw/siw/siw.h | 2 +-
drivers/infiniband/sw/siw/siw_mem.c | 20 ++++++--------------
drivers/infiniband/sw/siw/siw_verbs.c | 15 ---------------
3 files changed, 7 insertions(+), 30 deletions(-)
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 2f3a9cd..0c4a3ec 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -124,7 +124,7 @@ struct siw_umem {
int num_pages;
bool writable;
u64 fp_addr; /* First page base address */
- struct mm_struct *owning_mm;
+ struct vm_account vm_account;
};
struct siw_pble {
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index b2b33dd..9c53fc3 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -68,7 +68,6 @@ static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
void siw_umem_release(struct siw_umem *umem, bool dirty)
{
- struct mm_struct *mm_s = umem->owning_mm;
int i, num_pages = umem->num_pages;
for (i = 0; num_pages; i++) {
@@ -79,9 +78,9 @@ void siw_umem_release(struct siw_umem *umem, bool dirty)
kfree(umem->page_chunk[i].plist);
num_pages -= to_free;
}
- atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
+ vm_unaccount_pinned(&umem->vm_account, umem->num_pages);
+ vm_account_release(&umem->vm_account);
- mmdrop(mm_s);
kfree(umem->page_chunk);
kfree(umem);
}
@@ -365,9 +364,7 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf)
struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
{
struct siw_umem *umem;
- struct mm_struct *mm_s;
u64 first_page_va;
- unsigned long mlock_limit;
unsigned int foll_flags = FOLL_LONGTERM;
int num_pages, num_chunks, i, rv = 0;
@@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
if (!umem)
return ERR_PTR(-ENOMEM);
- mm_s = current->mm;
- umem->owning_mm = mm_s;
umem->writable = writable;
- mmgrab(mm_s);
+ vm_account_init_current(&umem->vm_account);
if (writable)
foll_flags |= FOLL_WRITE;
- mmap_read_lock(mm_s);
+ mmap_read_lock(current->mm);
- mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
+ if (vm_account_pinned(&umem->vm_account, num_pages)) {
rv = -ENOMEM;
goto out_sem_up;
}
@@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
goto out_sem_up;
umem->num_pages += rv;
- atomic64_add(rv, &mm_s->pinned_vm);
first_page_va += rv * PAGE_SIZE;
nents -= rv;
got += rv;
@@ -437,7 +429,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
num_pages -= got;
}
out_sem_up:
- mmap_read_unlock(mm_s);
+ mmap_read_unlock(current->mm);
if (rv > 0)
return umem;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 906fde1..8fab009 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1321,8 +1321,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct siw_umem *umem = NULL;
struct siw_ureq_reg_mr ureq;
struct siw_device *sdev = to_siw_dev(pd->device);
-
- unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
int rv;
siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
@@ -1338,19 +1336,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
rv = -EINVAL;
goto err_out;
}
- if (mem_limit != RLIM_INFINITY) {
- unsigned long num_pages =
- (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
- mem_limit >>= PAGE_SHIFT;
-
- if (num_pages > mem_limit - current->mm->locked_vm) {
- siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
- num_pages, mem_limit,
- current->mm->locked_vm);
- rv = -ENOMEM;
- goto err_out;
- }
- }
umem = siw_umem_get(start, len, ib_access_writable(rights));
if (IS_ERR(umem)) {
rv = PTR_ERR(umem);
--
git-series 0.9.1
On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote: > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) > if (!umem) > return ERR_PTR(-ENOMEM); > > - mm_s = current->mm; > - umem->owning_mm = mm_s; > umem->writable = writable; > > - mmgrab(mm_s); > + vm_account_init_current(&umem->vm_account); > > if (writable) > foll_flags |= FOLL_WRITE; > > - mmap_read_lock(mm_s); > + mmap_read_lock(current->mm); > > - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > - > - if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) { > + if (vm_account_pinned(&umem->vm_account, num_pages)) { > rv = -ENOMEM; > goto out_sem_up; > } > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) > goto out_sem_up; > > umem->num_pages += rv; > - atomic64_add(rv, &mm_s->pinned_vm); Also fixes the race bug Jason
> -----Original Message----- > From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, 24 January 2023 15:37 > To: Alistair Popple <apopple@nvidia.com> > Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux- > kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com; > hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch; > Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>; > linux-rdma@vger.kernel.org > Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use > vm_account > > On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote: > > > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, > bool writable) > > if (!umem) > > return ERR_PTR(-ENOMEM); > > > > - mm_s = current->mm; > > - umem->owning_mm = mm_s; > > umem->writable = writable; > > > > - mmgrab(mm_s); > > + vm_account_init_current(&umem->vm_account); > > > > if (writable) > > foll_flags |= FOLL_WRITE; > > > > - mmap_read_lock(mm_s); > > + mmap_read_lock(current->mm); > > > > - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > > - > > - if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) { > > + if (vm_account_pinned(&umem->vm_account, num_pages)) { > > rv = -ENOMEM; > > goto out_sem_up; > > } > > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, > bool writable) > > goto out_sem_up; > > > > umem->num_pages += rv; > > - atomic64_add(rv, &mm_s->pinned_vm); > > Also fixes the race bug But introduces another one. In that loop, umem->num_pages keeps the number of pages currently pinned, not the target number. The current patch uses that umem->num_pages to call vm_unaccount_pinned() in siw_umem_release(). Bailing out before all pages are pinned would mess up that accounting during release. Maybe introduce another parameter to siw_umem_release(), or better have another umem member 'umem->num_pages_accounted' for correct accounting during release. Bernard. > > Jason
Bernard Metzler <BMT@zurich.ibm.com> writes: >> -----Original Message----- >> From: Jason Gunthorpe <jgg@nvidia.com> >> Sent: Tuesday, 24 January 2023 15:37 >> To: Alistair Popple <apopple@nvidia.com> >> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux- >> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com; >> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch; >> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>; >> linux-rdma@vger.kernel.org >> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use >> vm_account >> >> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote: >> >> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, >> bool writable) >> > if (!umem) >> > return ERR_PTR(-ENOMEM); >> > >> > - mm_s = current->mm; >> > - umem->owning_mm = mm_s; >> > umem->writable = writable; >> > >> > - mmgrab(mm_s); >> > + vm_account_init_current(&umem->vm_account); >> > >> > if (writable) >> > foll_flags |= FOLL_WRITE; >> > >> > - mmap_read_lock(mm_s); >> > + mmap_read_lock(current->mm); >> > >> > - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; >> > - >> > - if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) { >> > + if (vm_account_pinned(&umem->vm_account, num_pages)) { >> > rv = -ENOMEM; >> > goto out_sem_up; >> > } >> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, >> bool writable) >> > goto out_sem_up; >> > >> > umem->num_pages += rv; >> > - atomic64_add(rv, &mm_s->pinned_vm); >> >> Also fixes the race bug > > But introduces another one. In that loop, umem->num_pages keeps the > number of pages currently pinned, not the target number. The current > patch uses that umem->num_pages to call vm_unaccount_pinned() in > siw_umem_release(). Bailing out before all pages are pinned would > mess up that accounting during release. Maybe introduce another > parameter to siw_umem_release(), or better have another umem member > 'umem->num_pages_accounted' for correct accounting during release. Yes, I see the problem thanks for pointing it out. Will fix for the next version. > Bernard. >> >> Jason
> -----Original Message----- > From: Alistair Popple <apopple@nvidia.com> > Sent: Monday, 30 January 2023 12:35 > To: Bernard Metzler <BMT@zurich.ibm.com> > Cc: Jason Gunthorpe <jgg@nvidia.com>; linux-mm@kvack.org; > cgroups@vger.kernel.org; linux-kernel@vger.kernel.org; jhubbard@nvidia.com; > tjmercier@google.com; hannes@cmpxchg.org; surenb@google.com; > mkoutny@suse.com; daniel@ffwll.ch; Leon Romanovsky <leon@kernel.org>; > linux-rdma@vger.kernel.org > Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use > vm_account > > > Bernard Metzler <BMT@zurich.ibm.com> writes: > > >> -----Original Message----- > >> From: Jason Gunthorpe <jgg@nvidia.com> > >> Sent: Tuesday, 24 January 2023 15:37 > >> To: Alistair Popple <apopple@nvidia.com> > >> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux- > >> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com; > >> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; > daniel@ffwll.ch; > >> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>; > >> linux-rdma@vger.kernel.org > >> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use > >> vm_account > >> > >> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote: > >> > >> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 > len, > >> bool writable) > >> > if (!umem) > >> > return ERR_PTR(-ENOMEM); > >> > > >> > - mm_s = current->mm; > >> > - umem->owning_mm = mm_s; > >> > umem->writable = writable; > >> > > >> > - mmgrab(mm_s); > >> > + vm_account_init_current(&umem->vm_account); > >> > > >> > if (writable) > >> > foll_flags |= FOLL_WRITE; > >> > > >> > - mmap_read_lock(mm_s); > >> > + mmap_read_lock(current->mm); > >> > > >> > - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > >> > - > >> > - if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) { > >> > + if (vm_account_pinned(&umem->vm_account, num_pages)) { > >> > rv = -ENOMEM; > >> > goto out_sem_up; > >> > } > >> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, > >> bool writable) > >> > goto out_sem_up; > >> > > >> > umem->num_pages += rv; > >> > - atomic64_add(rv, &mm_s->pinned_vm); > >> > >> Also fixes the race bug > > > > But introduces another one. In that loop, umem->num_pages keeps the > > number of pages currently pinned, not the target number. The current > > patch uses that umem->num_pages to call vm_unaccount_pinned() in > > siw_umem_release(). Bailing out before all pages are pinned would > > mess up that accounting during release. Maybe introduce another > > parameter to siw_umem_release(), or better have another umem member > > 'umem->num_pages_accounted' for correct accounting during release. > > Yes, I see the problem thanks for pointing it out. Will fix for the next > version. Thank you! Let me send a patch to the original code, just checking if not all pages are pinned and fix the counter accordingly. Maybe you can go from there..? Thank you, Bernard.
> -----Original Message----- > From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Tuesday, 24 January 2023 15:37 > To: Alistair Popple <apopple@nvidia.com> > Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux- > kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com; > hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch; > Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>; > linux-rdma@vger.kernel.org > Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use > vm_account > > On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote: > > > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, > bool writable) > > if (!umem) > > return ERR_PTR(-ENOMEM); > > > > - mm_s = current->mm; > > - umem->owning_mm = mm_s; > > umem->writable = writable; > > > > - mmgrab(mm_s); > > + vm_account_init_current(&umem->vm_account); > > > > if (writable) > > foll_flags |= FOLL_WRITE; > > > > - mmap_read_lock(mm_s); > > + mmap_read_lock(current->mm); > > > > - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > > - > > - if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) { > > + if (vm_account_pinned(&umem->vm_account, num_pages)) { > > rv = -ENOMEM; > > goto out_sem_up; > > } > > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, > bool writable) > > goto out_sem_up; > > > > umem->num_pages += rv; > > - atomic64_add(rv, &mm_s->pinned_vm); > > Also fixes the race bug > True! Should have used atomic64_add_return() in the first place... Bernard.
© 2016 - 2025 Red Hat, Inc.