From nobody Thu Apr 9 12:08:22 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1354B3A9D8A; Mon, 9 Mar 2026 16:35:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1773074119; cv=none; b=FpW3ftCdADHUfVRi1txs5H3AK134YjI/m8rumOZGFcYAjPqKufkkaFlV1ZP+QFVvchKdZNJHNQzcxwMYbi++09d+0Jc1UQHLM5svao4epYV5CYI4cXPEvKLWWvw6W7XVwlS88DMGk5UtFBQWJzXuUEmK0DoCQuvV1ZlEfbi7R3E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1773074119; c=relaxed/simple; bh=fssZhaE39h8/ifGb5lyKDHlx9SyrHqGfVID8VjmY0ME=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=AaVgx4Wp8NeWhrHizVG7oGkejqq7M2iawmhG7jQj6PwZRwFp6ImLy+4buVtI6oJCsNjNb/f24TlbS95M/MUN8263SU/wGsq0MGix9uMBHR0GY4P7lQeWQk8DtioD7bv6H1IrySEFt45Z/lfv6OD0XsiU4wXn3/pt8e9uZC43OOs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=nE9CPFES; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="nE9CPFES" Received: by smtp.kernel.org (Postfix) with ESMTPS id C224BC2BCB7; Mon, 9 Mar 2026 16:35:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1773074118; bh=fssZhaE39h8/ifGb5lyKDHlx9SyrHqGfVID8VjmY0ME=; h=From:Date:Subject:References:In-Reply-To:To:Cc:Reply-To:From; b=nE9CPFESeIElYpdFRVFYi/eDY9RoazP12jcTKXLiS3P7dRa9zphKtGh1pPAm0qMUf Si1w3fT84m6mtNoKkiABeBnJPXnSSW1pcCw5+SieCJkAQsC0XZRTZbvXQhkysdgEm9 B56/DvoqOv6oxIeGkLvbO/i/fL6vA8SyOZbjKIf6Ub6oMWmKpiG3trmrmtJGoc+9ut oISHSJI+QPG7Oi1wFCM0b/vDyzjppoceO75Z3Y3wZExWRhgsBxLQdEUmL3sEVDuhK9 Xmb/yGfDEC15+YoOym7ExlwwmTmLi825LE4F0VxIHZet28u3jXNL+ZEPYjOhdEi7rC oLdNDkjd3nh2g== Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id B1174F41812; Mon, 9 Mar 2026 16:35:18 +0000 (UTC) From: Tim Kovalenko via B4 Relay Date: Mon, 09 Mar 2026 12:34:21 -0400 Subject: [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me> References: <20260309-drm-rust-next-v4-0-4ef485b19a4c@proton.me> In-Reply-To: <20260309-drm-rust-next-v4-0-4ef485b19a4c@proton.me> To: Alexandre Courbot , Danilo Krummrich , Alice Ryhl , David Airlie , Simona Vetter , Miguel Ojeda , Gary Guo , =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= , Benno Lossin , Andreas Hindborg , Trevor Gross , Boqun Feng , Nathan Chancellor , Nicolas Schier , Abdiel Janulgue , Daniel Almeida , Robin Murphy , Boqun Feng Cc: nouveau@lists.freedesktop.org, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, linux-kbuild@vger.kernel.org, driver-core@lists.linux.dev, Tim Kovalenko X-Mailer: b4 0.14.2 X-Developer-Signature: v=1; a=ed25519-sha256; t=1773074117; l=3986; i=tim.kovalenko@proton.me; s=20260212; h=from:subject:message-id; bh=02lIiBKVUWPZgiHflont6ppBx+2QAZbO4PzXiivsIkE=; b=pE0ZKAo+0q+3X2PamEznSFPbbQiSdIw7Xy/EgQG+Zfy/iiwn0Rb6Om2UKZsCeEXyeudQjTtkF +jL/j8+uX6ICxdDQJsEfm++qZt+CwLBc7cGDTtVkdNtnXo/EFKH2KKd X-Developer-Key: i=tim.kovalenko@proton.me; a=ed25519; pk=/+OiulEpgeZifgP4mDE4e5YlV6nMeY+frze/lY/xiHI= X-Endpoint-Received: by B4 Relay for tim.kovalenko@proton.me/20260212 with auth_id=635 X-Original-From: Tim Kovalenko Reply-To: tim.kovalenko@proton.me From: Tim Kovalenko The `Cmdq::new` function was allocating a `PteArray` struct on the stack and was causing a stack overflow with 8216 bytes. Modify the `PteArray` to calculate and write the Page Table Entries directly into the coherent DMA buffer one-by-one. This reduces the stack usage quite a lot. Signed-off-by: Tim Kovalenko Acked-by: Alexandre Courbot Reported-by: Gary Guo --- drivers/gpu/nova-core/gsp.rs | 34 +++++++++++++++++++--------------- drivers/gpu/nova-core/gsp/cmdq.rs | 15 ++++++++++++++- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 25cd48514c777cb405a2af0acf57196b2e2e7837..20170e483e04c476efce8997b39= 16b0ad829ed38 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -47,16 +47,11 @@ unsafe impl AsBytes for PteArray {} =20 impl PteArray { - /// Creates a new page table array mapping `NUM_PAGES` GSP pages start= ing at address `start`. - fn new(start: DmaAddress) -> Result { - let mut ptes =3D [0u64; NUM_PAGES]; - for (i, pte) in ptes.iter_mut().enumerate() { - *pte =3D start - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; - } - - Ok(Self(ptes)) + /// Returns the page table entry for `index`, for a mapping starting a= t `start` DmaAddress. + fn entry(start: DmaAddress, index: usize) -> Result { + start + .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW) } } =20 @@ -86,16 +81,25 @@ fn new(dev: &device::Device) -> Result { NUM_PAGES * GSP_PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, )?); - let ptes =3D PteArray::::new(obj.0.dma_handle())?; + + let start_addr =3D obj.0.dma_handle(); =20 // SAFETY: `obj` has just been created and we are its sole user. - unsafe { - // Copy the self-mapping PTE at the expected location. + let pte_region =3D unsafe { obj.0 - .as_slice_mut(size_of::(), size_of_val(&ptes))? - .copy_from_slice(ptes.as_bytes()) + .as_slice_mut(size_of::(), NUM_PAGES * size_of::= ())? }; =20 + // This is a one by one GSP Page write to the memory + // to avoid stack overflow when allocating the whole array at once. + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).en= umerate() { + let pte_value =3D start_addr + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW)?; + + chunk.copy_from_slice(&pte_value.to_ne_bytes()); + } + Ok(obj) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/= cmdq.rs index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee99038= ad10a16e1e32d 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -202,7 +202,20 @@ fn new(dev: &device::Device) -> Result<= Self> { =20 let gsp_mem =3D CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNE= L | __GFP_ZERO)?; - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())= ?); + + const NUM_PTES: usize =3D GSP_PAGE_SIZE / size_of::(); + + let start =3D gsp_mem.dma_handle(); + // One by one GSP Page write to the memory to avoid stack overflow= when allocating + // the whole array at once. + for i in 0..NUM_PTES { + dma_write!( + gsp_mem, + [0]?.ptes.0[i], + PteArray::::entry(start, i)? + ); + } + dma_write!( gsp_mem, [0]?.cpuq.tx, --=20 2.53.0