[PATCH v4 7/8] drm/amdkfd: Unify userptr cleanup and update paths

Honglei Huang posted 8 patches 9 hours ago
[PATCH v4 7/8] drm/amdkfd: Unify userptr cleanup and update paths
Posted by Honglei Huang 9 hours ago
From: Honglei Huang <honghuan@amd.com>

Refactor userptr management code to handle both single and batch
allocations uniformly.

This adds:
- cleanup_userptr_resources(): unified cleanup for single/batch
- discard_user_pages_batch(): discard pages for batch ranges
- amdgpu_amdkfd_update_user_pages_batch(): update pages for batch
- valid_user_pages_batch(): validate batch pages

Modified functions to support batch mode:
- update_invalid_user_pages(): uses batch update when applicable
- confirm_valid_user_pages_locked(): checks batch validity
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(): uses unified cleanup

Signed-off-by: Honglei Huang <honghuan@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 158 ++++++++++++++++--
 1 file changed, 141 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index c2fc31964..7233b127b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2281,6 +2281,35 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu_batch(
 	return ret;
 }
 
+static void cleanup_userptr_resources(struct kgd_mem *mem,
+				      struct amdkfd_process_info *process_info)
+{
+	uint32_t i;
+
+	if (!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm))
+		return;
+
+	if (mem->num_user_ranges > 0 && mem->user_ranges) {
+		for (i = 0; i < mem->num_user_ranges; i++)
+			interval_tree_remove(&mem->user_ranges[i].it_node,
+					     &mem->user_ranges_itree);
+
+		if (mem->batch_notifier.mm) {
+			mmu_interval_notifier_remove(&mem->batch_notifier);
+			mem->batch_notifier.mm = NULL;
+		}
+
+		kvfree(mem->user_ranges);
+		mem->user_ranges = NULL;
+		mem->num_user_ranges = 0;
+	} else {
+		amdgpu_hmm_unregister(mem->bo);
+		mutex_lock(&process_info->notifier_lock);
+		amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
+		mutex_unlock(&process_info->notifier_lock);
+	}
+}
+
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
 		uint64_t *size)
@@ -2322,12 +2351,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	mutex_unlock(&process_info->lock);
 
 	/* Cleanup user pages and MMU notifiers */
-	if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
-		amdgpu_hmm_unregister(mem->bo);
-		mutex_lock(&process_info->notifier_lock);
-		amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
-		mutex_unlock(&process_info->notifier_lock);
-	}
+	cleanup_userptr_resources(mem, process_info);
 
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
@@ -2914,6 +2938,51 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
 	return r;
 }
 
+static void discard_user_pages_batch(struct amdgpu_bo *bo, struct kgd_mem *mem)
+{
+	uint32_t i;
+
+	for (i = 0; i < mem->num_user_ranges; i++) {
+		if (!mem->user_ranges[i].valid && mem->user_ranges[i].range) {
+			amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm,
+							 mem->user_ranges[i].range);
+			mem->user_ranges[i].range = NULL;
+		}
+	}
+}
+
+static int amdgpu_amdkfd_update_user_pages_batch(struct mm_struct *mm,
+						 struct amdgpu_bo *bo,
+						 struct kgd_mem *mem)
+{
+	uint32_t i;
+	int ret = 0;
+
+	if (!mmget_not_zero(mm))
+		return -ESRCH;
+
+	mmap_read_lock(mm);
+	for (i = 0; i < mem->num_user_ranges; i++) {
+		if (mem->user_ranges[i].valid)
+			continue;
+
+		ret = get_user_pages_batch_locked(
+			mm, mem, &mem->user_ranges[i],
+			&mem->user_ranges[i].range, amdgpu_ttm_tt_is_readonly(bo->tbo.ttm));
+		if (ret) {
+			pr_debug("Failed %d to get user pages for range %u\n",
+				 ret, i);
+			break;
+		}
+
+		mem->user_ranges[i].valid = true;
+	}
+	mmap_read_unlock(mm);
+	mmput(mm);
+
+	return ret;
+}
+
 /* Update invalid userptr BOs
  *
  * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
@@ -2928,6 +2997,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 	struct ttm_operation_ctx ctx = { false, false };
 	uint32_t invalid;
 	int ret = 0;
+	uint32_t i;
 
 	mutex_lock(&process_info->notifier_lock);
 
@@ -2951,8 +3021,12 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 
 		bo = mem->bo;
 
-		amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
-		mem->range = NULL;
+		if (mem->num_user_ranges > 0 && mem->user_ranges)
+			discard_user_pages_batch(bo, mem);
+		else {
+			amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
+			mem->range = NULL;
+		}
 
 		/* BO reservations and getting user pages (hmm_range_fault)
 		 * must happen outside the notifier lock
@@ -2976,7 +3050,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 		}
 
 		/* Get updated user pages */
-		ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
+		if (mem->num_user_ranges > 0 && mem->user_ranges)
+			ret = amdgpu_amdkfd_update_user_pages_batch(mm, bo, mem);
+		else
+			ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
+
 		if (ret) {
 			pr_debug("Failed %d to get user pages\n", ret);
 
@@ -3010,7 +3088,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			ret = 0;
 		}
 
-		amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+		if (mem->num_user_ranges == 0)
+			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+		else
+			set_user_pages_batch(bo->tbo.ttm, mem->user_ranges, mem->num_user_ranges);
 
 		mutex_lock(&process_info->notifier_lock);
 
@@ -3024,6 +3105,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 		 /* set mem valid if mem has hmm range associated */
 		if (mem->range)
 			mem->invalid = 0;
+
+		/* For batch mode, clear global invalid counter and mark ranges as valid.
+		 * Individual range validity is tracked by valid flag.
+		 */
+		if (mem->num_user_ranges > 0 && mem->user_ranges) {
+			mem->invalid = 0;
+			for (i = 0; i < mem->num_user_ranges; i++) {
+				if (mem->user_ranges[i].range)
+					mem->user_ranges[i].valid = true;
+			}
+		}
 	}
 
 unlock_out:
@@ -3131,6 +3223,33 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	return ret;
 }
 
+static bool valid_user_pages_batch(struct kgd_mem *mem)
+{
+	uint32_t i;
+	bool all_valid = true;
+
+	if (!mem->user_ranges || mem->num_user_ranges == 0)
+		return true;
+
+	for (i = 0; i < mem->num_user_ranges; i++) {
+		if (!mem->user_ranges[i].valid) {
+			all_valid = false;
+			continue;
+		}
+
+		if (!mem->user_ranges[i].range)
+			continue;
+
+		if (!amdgpu_ttm_tt_get_user_pages_done(
+			mem->bo->tbo.ttm, mem->user_ranges[i].range))
+			all_valid = false;
+
+		mem->user_ranges[i].range = NULL;
+	}
+
+	return all_valid;
+}
+
 /* Confirm that all user pages are valid while holding the notifier lock
  *
  * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
@@ -3145,15 +3264,20 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
 				 validate_list) {
 		bool valid;
 
-		/* keep mem without hmm range at userptr_inval_list */
-		if (!mem->range)
-			continue;
+		if (mem->num_user_ranges > 0 && mem->user_ranges)
+			valid = valid_user_pages_batch(mem);
+		else {
+			/* keep mem without hmm range at userptr_inval_list */
+			if (!mem->range)
+				continue;
 
-		/* Only check mem with hmm range associated */
-		valid = amdgpu_ttm_tt_get_user_pages_done(
-					mem->bo->tbo.ttm, mem->range);
+			/* Only check mem with hmm range associated */
+			valid = amdgpu_ttm_tt_get_user_pages_done(
+						mem->bo->tbo.ttm, mem->range);
+
+			mem->range = NULL;
+		}
 
-		mem->range = NULL;
 		if (!valid) {
 			WARN(!mem->invalid, "Invalid BO not marked invalid");
 			ret = -EAGAIN;
-- 
2.34.1