From nobody Tue Dec 2 02:32:12 2025 Received: from out28-105.mail.aliyun.com (out28-105.mail.aliyun.com [115.124.28.105]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 68119329E72 for ; Wed, 19 Nov 2025 08:34:11 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=115.124.28.105 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763541259; cv=none; b=FfXCZh1i1LGJQIQkM5qIyoYU/egFXuh+X7XIQLgJqRoBXmjA60Woid6+aLEZna7dwCZ+l+okkhELMfedZSCJX/fAYwiY+grS3h/PFrWHwEtv0DcVKGn5BAMJnBYRcTazVv3rQC3SI9fEws6R+NB+VB+R5401snm3qArYXoHdEDQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763541259; c=relaxed/simple; bh=7zQGTXcVK9RkFxGLmyXKl1LVE31eTjD2ohTWl0o/xmg=; h=From:To:Cc:Subject:Date:Message-Id:MIME-Version; b=FXSEWt5toApr77n2I20omb3tnDHK4pcTJBl2ELTLlzRJ6AEQcXMgq3XyKPk8m/Qo8BH18H2jPHQyatpOqn6CoTUkJJieYbE/QDBHB5ll+Gdvk0efZWMvIVjMfkJM/y7u8RPz8YwvRgHgSaiWvekduPdgp0A5SHzEaZrbxOTcpkk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=bosc.ac.cn; spf=pass smtp.mailfrom=bosc.ac.cn; arc=none smtp.client-ip=115.124.28.105 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=bosc.ac.cn Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=bosc.ac.cn Received: from aliyun.com(mailfrom:zhangzhijie@bosc.ac.cn fp:SMTPD_---.fQ5Hg1C_1763540924 cluster:ay29) by smtp.aliyun-inc.com; Wed, 19 Nov 2025 16:28:45 +0800 From: zhangzhijie To: Hawking.Zhang@amd.com, zhangzhijie@bosc.ac.cn, wangran@bosc.ac.cn, zhangjian@bosc.ac.cn, alexander.deucher@amd.com, christian.koenig@amd.com, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org Cc: botton_zhang@163.com Subject: [PATCH v1] tests: Add test suite for double-checking userptr write validity and VRAM Date: Wed, 19 Nov 2025 16:28:41 +0800 Message-Id: <20251119082841.1179938-1-zhangzhijie@bosc.ac.cn> X-Mailer: git-send-email 2.34.1 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Userptr resides in host memory, and PCIe writes involve cache coherence. By using SDMA to copy GTT to VRAM and then verifying the values in VRAM, we= can validate GTT cache coherence. Bo(Userptr) ----> SDMA ---> Bo(userptr) ----sdma-----> VRAM Signed-off-by: zhangzhijie --- tests/amdgpu/basic_tests.c | 155 +++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index 0e4a357b..223a9b0b 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -2061,12 +2061,167 @@ static void amdgpu_command_submission_sdma_copy_li= near(void) { amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); } +static void amdgpu_command_userptr_copy_to_vram_linear(void) +{ + int i, r, j; + uint32_t *pm4 =3D NULL; + uint64_t bo_mc; + void *ptr =3D NULL; + int pm4_dw =3D 256; + int sdma_write_length =3D 4; + amdgpu_bo_handle handle; + amdgpu_context_handle context_handle; + struct amdgpu_cs_ib_info *ib_info; + struct amdgpu_cs_request *ibs_request; + amdgpu_bo_handle buf_handle; + amdgpu_va_handle va_handle; + + amdgpu_bo_handle bo1; + amdgpu_bo_handle *resources; + uint64_t bo1_mc; + volatile unsigned char *bo1_cpu; + amdgpu_va_handle bo1_va_handle; + + + r =3D amdgpu_bo_alloc_and_map(device_handle, + sdma_write_length, 4096, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &bo1, + (void**)&bo1_cpu, &bo1_mc, + &bo1_va_handle); + CU_ASSERT_EQUAL(r, 0); + /* set bo1 */ + memset((void*)bo1_cpu, 0xaa, sdma_write_length); + + pm4 =3D calloc(pm4_dw, sizeof(*pm4)); + CU_ASSERT_NOT_EQUAL(pm4, NULL); + + ib_info =3D calloc(1, sizeof(*ib_info)); + CU_ASSERT_NOT_EQUAL(ib_info, NULL); + + ibs_request =3D calloc(1, sizeof(*ibs_request)); + CU_ASSERT_NOT_EQUAL(ibs_request, NULL); + + r =3D amdgpu_cs_ctx_create(device_handle, &context_handle); + CU_ASSERT_EQUAL(r, 0); + + posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); + CU_ASSERT_NOT_EQUAL(ptr, NULL); + memset(ptr, 0, BUFFER_SIZE); + + r =3D amdgpu_create_bo_from_user_mem(device_handle, + ptr, BUFFER_SIZE, &buf_handle); + CU_ASSERT_EQUAL(r, 0); + + r =3D amdgpu_va_range_alloc(device_handle, + amdgpu_gpu_va_range_general, + BUFFER_SIZE, 1, 0, &bo_mc, + &va_handle, 0); + CU_ASSERT_EQUAL(r, 0); + + r =3D amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_= MAP); + CU_ASSERT_EQUAL(r, 0); + + handle =3D buf_handle; + + j =3D i =3D 0; + + if (family_id =3D=3D AMDGPU_FAMILY_SI) + pm4[i++] =3D SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, + sdma_write_length); + else + pm4[i++] =3D SDMA_PACKET(SDMA_OPCODE_WRITE, + SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + pm4[i++] =3D 0xffffffff & bo_mc; + pm4[i++] =3D (0xffffffff00000000 & bo_mc) >> 32; + if (family_id >=3D AMDGPU_FAMILY_AI) + pm4[i++] =3D sdma_write_length - 1; + else if (family_id !=3D AMDGPU_FAMILY_SI) + pm4[i++] =3D sdma_write_length; + + while (j++ < sdma_write_length) + pm4[i++] =3D 0xdeadbeaf; + + if (!fork()) { + pm4[0] =3D 0x0; + exit(0); + } + + amdgpu_test_exec_cs_helper(context_handle, + AMDGPU_HW_IP_DMA, 0, + i, pm4, + 1, &handle, + ib_info, ibs_request); + + i =3D 0; + sdma_write_length =3D 1024; + if (family_id =3D=3D AMDGPU_FAMILY_SI) { + pm4[i++] =3D + SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, sdma_write_length); + pm4[i++] =3D 0xffffffff & bo1_mc; + pm4[i++] =3D 0xffffffff & bo_mc; + pm4[i++] =3D (0xffffffff00000000 & bo1_mc) >> 32; + pm4[i++] =3D (0xffffffff00000000 & bo_mc) >> 32; + } else { + pm4[i++] =3D + SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); + if (family_id >=3D AMDGPU_FAMILY_AI) + pm4[i++] =3D sdma_write_length - 1; + else + pm4[i++] =3D sdma_write_length; + pm4[i++] =3D 0; + pm4[i++] =3D 0xffffffff & bo_mc; + pm4[i++] =3D (0xffffffff00000000 & bo_mc) >> 32; + pm4[i++] =3D 0xffffffff & bo1_mc; + pm4[i++] =3D (0xffffffff00000000 & bo1_mc) >> 32; + } + /* prepare resource */ + resources =3D calloc(2, sizeof(amdgpu_bo_handle)); + CU_ASSERT_NOT_EQUAL(resources, NULL); + + resources[0] =3D bo1; + resources[1] =3D handle; + amdgpu_test_exec_cs_helper(context_handle, + AMDGPU_HW_IP_DMA, 0, + i, pm4, + 2, resources, + ib_info, ibs_request); + + i =3D 0; + while (i < 4) { + CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); + } + + i =3D 0; + while (i < 4) { + CU_ASSERT_EQUAL(((int*)bo1_cpu)[i++], 0xdeadbeaf); + } + free(ibs_request); + free(ib_info); + free(pm4); + + r =3D amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_= UNMAP); + CU_ASSERT_EQUAL(r, 0); + r =3D amdgpu_va_range_free(va_handle); + CU_ASSERT_EQUAL(r, 0); + r =3D amdgpu_bo_free(buf_handle); + CU_ASSERT_EQUAL(r, 0); + free(ptr); + r =3D amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, + sdma_write_length); + CU_ASSERT_EQUAL(r, 0); + r =3D amdgpu_cs_ctx_free(context_handle); + CU_ASSERT_EQUAL(r, 0); + + wait(NULL); +} =20 static void amdgpu_command_submission_sdma(void) { amdgpu_command_submission_sdma_write_linear(); amdgpu_command_submission_sdma_const_fill(); amdgpu_command_submission_sdma_copy_linear(); + amdgpu_command_userptr_copy_to_vram_linear(); } =20 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) --=20 2.34.1