[RFC PATCH 05/19] RMDA/siw: Convert to use vm_account

Alistair Popple posted 19 patches 2 years, 7 months ago
There is a newer version of this series
[RFC PATCH 05/19] RMDA/siw: Convert to use vm_account
Posted by Alistair Popple 2 years, 7 months ago
Convert to using a vm_account structure to account pinned memory to
both the mm and the pins cgroup.

Signed-off-by: Alistair Popple <apopple@nvidia.com>
Cc: Bernard Metzler <bmt@zurich.ibm.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: linux-rdma@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/infiniband/sw/siw/siw.h       |  2 +-
 drivers/infiniband/sw/siw/siw_mem.c   | 20 ++++++--------------
 drivers/infiniband/sw/siw/siw_verbs.c | 15 ---------------
 3 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 2f3a9cd..0c4a3ec 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -124,7 +124,7 @@ struct siw_umem {
 	int num_pages;
 	bool writable;
 	u64 fp_addr; /* First page base address */
-	struct mm_struct *owning_mm;
+	struct vm_account vm_account;
 };
 
 struct siw_pble {
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index b2b33dd..9c53fc3 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -68,7 +68,6 @@ static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
 
 void siw_umem_release(struct siw_umem *umem, bool dirty)
 {
-	struct mm_struct *mm_s = umem->owning_mm;
 	int i, num_pages = umem->num_pages;
 
 	for (i = 0; num_pages; i++) {
@@ -79,9 +78,9 @@ void siw_umem_release(struct siw_umem *umem, bool dirty)
 		kfree(umem->page_chunk[i].plist);
 		num_pages -= to_free;
 	}
-	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
+	vm_unaccount_pinned(&umem->vm_account, umem->num_pages);
+	vm_account_release(&umem->vm_account);
 
-	mmdrop(mm_s);
 	kfree(umem->page_chunk);
 	kfree(umem);
 }
@@ -365,9 +364,7 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf)
 struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 {
 	struct siw_umem *umem;
-	struct mm_struct *mm_s;
 	u64 first_page_va;
-	unsigned long mlock_limit;
 	unsigned int foll_flags = FOLL_LONGTERM;
 	int num_pages, num_chunks, i, rv = 0;
 
@@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 	if (!umem)
 		return ERR_PTR(-ENOMEM);
 
-	mm_s = current->mm;
-	umem->owning_mm = mm_s;
 	umem->writable = writable;
 
-	mmgrab(mm_s);
+	vm_account_init_current(&umem->vm_account);
 
 	if (writable)
 		foll_flags |= FOLL_WRITE;
 
-	mmap_read_lock(mm_s);
+	mmap_read_lock(current->mm);
 
-	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
+	if (vm_account_pinned(&umem->vm_account, num_pages)) {
 		rv = -ENOMEM;
 		goto out_sem_up;
 	}
@@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 				goto out_sem_up;
 
 			umem->num_pages += rv;
-			atomic64_add(rv, &mm_s->pinned_vm);
 			first_page_va += rv * PAGE_SIZE;
 			nents -= rv;
 			got += rv;
@@ -437,7 +429,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 		num_pages -= got;
 	}
 out_sem_up:
-	mmap_read_unlock(mm_s);
+	mmap_read_unlock(current->mm);
 
 	if (rv > 0)
 		return umem;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 906fde1..8fab009 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1321,8 +1321,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
 	struct siw_umem *umem = NULL;
 	struct siw_ureq_reg_mr ureq;
 	struct siw_device *sdev = to_siw_dev(pd->device);
-
-	unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
 	int rv;
 
 	siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
@@ -1338,19 +1336,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
 		rv = -EINVAL;
 		goto err_out;
 	}
-	if (mem_limit != RLIM_INFINITY) {
-		unsigned long num_pages =
-			(PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
-		mem_limit >>= PAGE_SHIFT;
-
-		if (num_pages > mem_limit - current->mm->locked_vm) {
-			siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
-				   num_pages, mem_limit,
-				   current->mm->locked_vm);
-			rv = -ENOMEM;
-			goto err_out;
-		}
-	}
 	umem = siw_umem_get(start, len, ib_access_writable(rights));
 	if (IS_ERR(umem)) {
 		rv = PTR_ERR(umem);
-- 
git-series 0.9.1
Re: [RFC PATCH 05/19] RMDA/siw: Convert to use vm_account
Posted by Jason Gunthorpe 2 years, 7 months ago
On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:

> @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
>  	if (!umem)
>  		return ERR_PTR(-ENOMEM);
>  
> -	mm_s = current->mm;
> -	umem->owning_mm = mm_s;
>  	umem->writable = writable;
>  
> -	mmgrab(mm_s);
> +	vm_account_init_current(&umem->vm_account);
>  
>  	if (writable)
>  		foll_flags |= FOLL_WRITE;
>  
> -	mmap_read_lock(mm_s);
> +	mmap_read_lock(current->mm);
>  
> -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> -
> -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
>  		rv = -ENOMEM;
>  		goto out_sem_up;
>  	}
> @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
>  				goto out_sem_up;
>  
>  			umem->num_pages += rv;
> -			atomic64_add(rv, &mm_s->pinned_vm);

Also fixes the race bug

Jason
RE: [RFC PATCH 05/19] RMDA/siw: Convert to use vm_account
Posted by Bernard Metzler 2 years, 7 months ago

> -----Original Message-----
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, 24 January 2023 15:37
> To: Alistair Popple <apopple@nvidia.com>
> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch;
> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
> linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> vm_account
> 
> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
> 
> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  	if (!umem)
> >  		return ERR_PTR(-ENOMEM);
> >
> > -	mm_s = current->mm;
> > -	umem->owning_mm = mm_s;
> >  	umem->writable = writable;
> >
> > -	mmgrab(mm_s);
> > +	vm_account_init_current(&umem->vm_account);
> >
> >  	if (writable)
> >  		foll_flags |= FOLL_WRITE;
> >
> > -	mmap_read_lock(mm_s);
> > +	mmap_read_lock(current->mm);
> >
> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> > -
> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
> >  		rv = -ENOMEM;
> >  		goto out_sem_up;
> >  	}
> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  				goto out_sem_up;
> >
> >  			umem->num_pages += rv;
> > -			atomic64_add(rv, &mm_s->pinned_vm);
> 
> Also fixes the race bug

But introduces another one. In that loop, umem->num_pages keeps the
number of pages currently pinned, not the target number. The current
patch uses that umem->num_pages to call vm_unaccount_pinned() in
siw_umem_release(). Bailing out before all pages are pinned would
mess up that accounting during release. Maybe introduce another
parameter to siw_umem_release(), or better have another umem member
'umem->num_pages_accounted' for correct accounting during release.

Bernard.
> 
> Jason
Re: [RFC PATCH 05/19] RMDA/siw: Convert to use vm_account
Posted by Alistair Popple 2 years, 7 months ago
Bernard Metzler <BMT@zurich.ibm.com> writes:

>> -----Original Message-----
>> From: Jason Gunthorpe <jgg@nvidia.com>
>> Sent: Tuesday, 24 January 2023 15:37
>> To: Alistair Popple <apopple@nvidia.com>
>> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
>> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
>> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch;
>> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
>> linux-rdma@vger.kernel.org
>> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
>> vm_account
>> 
>> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
>> 
>> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
>> bool writable)
>> >  	if (!umem)
>> >  		return ERR_PTR(-ENOMEM);
>> >
>> > -	mm_s = current->mm;
>> > -	umem->owning_mm = mm_s;
>> >  	umem->writable = writable;
>> >
>> > -	mmgrab(mm_s);
>> > +	vm_account_init_current(&umem->vm_account);
>> >
>> >  	if (writable)
>> >  		foll_flags |= FOLL_WRITE;
>> >
>> > -	mmap_read_lock(mm_s);
>> > +	mmap_read_lock(current->mm);
>> >
>> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> > -
>> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
>> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
>> >  		rv = -ENOMEM;
>> >  		goto out_sem_up;
>> >  	}
>> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
>> bool writable)
>> >  				goto out_sem_up;
>> >
>> >  			umem->num_pages += rv;
>> > -			atomic64_add(rv, &mm_s->pinned_vm);
>> 
>> Also fixes the race bug
>
> But introduces another one. In that loop, umem->num_pages keeps the
> number of pages currently pinned, not the target number. The current
> patch uses that umem->num_pages to call vm_unaccount_pinned() in
> siw_umem_release(). Bailing out before all pages are pinned would
> mess up that accounting during release. Maybe introduce another
> parameter to siw_umem_release(), or better have another umem member
> 'umem->num_pages_accounted' for correct accounting during release.

Yes, I see the problem thanks for pointing it out. Will fix for the next
version.

> Bernard.
>> 
>> Jason
RE: Re: [RFC PATCH 05/19] RMDA/siw: Convert to use vm_account
Posted by Bernard Metzler 2 years, 7 months ago

> -----Original Message-----
> From: Alistair Popple <apopple@nvidia.com>
> Sent: Monday, 30 January 2023 12:35
> To: Bernard Metzler <BMT@zurich.ibm.com>
> Cc: Jason Gunthorpe <jgg@nvidia.com>; linux-mm@kvack.org;
> cgroups@vger.kernel.org; linux-kernel@vger.kernel.org; jhubbard@nvidia.com;
> tjmercier@google.com; hannes@cmpxchg.org; surenb@google.com;
> mkoutny@suse.com; daniel@ffwll.ch; Leon Romanovsky <leon@kernel.org>;
> linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> vm_account
> 
> 
> Bernard Metzler <BMT@zurich.ibm.com> writes:
> 
> >> -----Original Message-----
> >> From: Jason Gunthorpe <jgg@nvidia.com>
> >> Sent: Tuesday, 24 January 2023 15:37
> >> To: Alistair Popple <apopple@nvidia.com>
> >> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
> >> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
> >> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com;
> daniel@ffwll.ch;
> >> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
> >> linux-rdma@vger.kernel.org
> >> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> >> vm_account
> >>
> >> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
> >>
> >> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64
> len,
> >> bool writable)
> >> >  	if (!umem)
> >> >  		return ERR_PTR(-ENOMEM);
> >> >
> >> > -	mm_s = current->mm;
> >> > -	umem->owning_mm = mm_s;
> >> >  	umem->writable = writable;
> >> >
> >> > -	mmgrab(mm_s);
> >> > +	vm_account_init_current(&umem->vm_account);
> >> >
> >> >  	if (writable)
> >> >  		foll_flags |= FOLL_WRITE;
> >> >
> >> > -	mmap_read_lock(mm_s);
> >> > +	mmap_read_lock(current->mm);
> >> >
> >> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> >> > -
> >> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> >> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
> >> >  		rv = -ENOMEM;
> >> >  		goto out_sem_up;
> >> >  	}
> >> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> >> bool writable)
> >> >  				goto out_sem_up;
> >> >
> >> >  			umem->num_pages += rv;
> >> > -			atomic64_add(rv, &mm_s->pinned_vm);
> >>
> >> Also fixes the race bug
> >
> > But introduces another one. In that loop, umem->num_pages keeps the
> > number of pages currently pinned, not the target number. The current
> > patch uses that umem->num_pages to call vm_unaccount_pinned() in
> > siw_umem_release(). Bailing out before all pages are pinned would
> > mess up that accounting during release. Maybe introduce another
> > parameter to siw_umem_release(), or better have another umem member
> > 'umem->num_pages_accounted' for correct accounting during release.
> 
> Yes, I see the problem thanks for pointing it out. Will fix for the next
> version.

Thank you! Let me send a patch to the original code,
just checking if not all pages are pinned and fix the
counter accordingly. Maybe you can go from there..?

Thank you,
Bernard.

RE: [RFC PATCH 05/19] RMDA/siw: Convert to use vm_account
Posted by Bernard Metzler 2 years, 7 months ago

> -----Original Message-----
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, 24 January 2023 15:37
> To: Alistair Popple <apopple@nvidia.com>
> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch;
> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
> linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> vm_account
> 
> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
> 
> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  	if (!umem)
> >  		return ERR_PTR(-ENOMEM);
> >
> > -	mm_s = current->mm;
> > -	umem->owning_mm = mm_s;
> >  	umem->writable = writable;
> >
> > -	mmgrab(mm_s);
> > +	vm_account_init_current(&umem->vm_account);
> >
> >  	if (writable)
> >  		foll_flags |= FOLL_WRITE;
> >
> > -	mmap_read_lock(mm_s);
> > +	mmap_read_lock(current->mm);
> >
> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> > -
> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
> >  		rv = -ENOMEM;
> >  		goto out_sem_up;
> >  	}
> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  				goto out_sem_up;
> >
> >  			umem->num_pages += rv;
> > -			atomic64_add(rv, &mm_s->pinned_vm);
> 
> Also fixes the race bug
> 
True! Should have used atomic64_add_return() in the first place...

Bernard.