From nobody Thu Apr 2 20:21:30 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6F5D025487C; Wed, 18 Feb 2026 04:01:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771387294; cv=none; b=A4XV2/MWKZnNBGQCy18Te7vld4LPRLBy/hr8bhiOaW7bsPQJu81GBeBafnOU2gakXi25GFXNPOMC9KyQmB88OgWnWHRybx/7EGa1wOPdb4h1ilEDl60MKRfxCY3Ok0Vsfjcn7mez7KRuZ8lD+wqRnfYEqIGXwKv1ggapgpf3cws= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771387294; c=relaxed/simple; bh=ey3DQqyq3X7dJ4CqMjdaGFSlYYIHXHiGR0VQGzR3rW0=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:To:Cc; b=Jwp+P9wz+NSUrYNR/AHNzgMVJ6TQZY79itvdR6bLS5Xymk0jDmwJSpCCYPwb6l3bWEie7jBl+VOx7a/I+aVQlEX52yh2LnSYaPObYcASwD7BlQa5++M58D2M5Ytr65oEBzR1byodC/PomKAuHzIFC9cB55qVb+B09tKvbYC2+L8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=S4I1Y97u; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="S4I1Y97u" Received: by smtp.kernel.org (Postfix) with ESMTPS id 01261C19425; Wed, 18 Feb 2026 04:01:34 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771387294; bh=ey3DQqyq3X7dJ4CqMjdaGFSlYYIHXHiGR0VQGzR3rW0=; h=From:Date:Subject:To:Cc:Reply-To:From; b=S4I1Y97uszchibP5IiAdK+WxiIusxPynwoWaIOy7vDxWvNiH+/EshbhLeBg1RP/we QzvXNVfN0vdqe7OQBdJQoiUW/ut8VBY54yOPI0x/k7oiCM3Zfvbpmegqk/G+lxz+sJ 9sXwjnrThV5R+EGX107/5s60KheZPKKrCwYUZBcaURUogUQdO4XBuzUtDPYsSTaFSe mwtkXFGzsGxxgxDnTRIWBUIl6EXhZG5XtFxLwSJ4WpiO91Sgl0s1KvsU1HzfqTRnnf PBQrZRIsFWSCM5ZrIXqj3TfPTTjjcIKpVJN2yXw+Xm7yt7VhvOTQwXlL6IyiLQYqUM 6UDw6ToBT9LSA== Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id DDF54E9A03B; Wed, 18 Feb 2026 04:01:33 +0000 (UTC) From: Tim Kovalenko via B4 Relay Date: Tue, 17 Feb 2026 23:01:29 -0500 Subject: [PATCH v3] gpu: nova-core: fix stack overflow in GSP memory allocation Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260217-drm-rust-next-v3-1-9e7e95c597dc@proton.me> X-B4-Tracking: v=1; b=H4sIAAAAAAAC/3XMQQ6CMBCF4auQrq1pBwTqynsYF8UO0gUtmdYGQ 7i7hRUxcfle8v0LC0gWA7sWCyNMNljv8ihPBXsO2r2QW5M3AwG1AAnc0MjpHSJ3OEfeYadAIyr TXFg2E2Fv5713f+Q92BA9ffZ8ktv7r5Qkl7wSqlRtJwFreZvIR+/OI7KtlOCoy18NWWstVNU3b QNSH/W6rl/aVHVF6AAAAA== X-Change-ID: 20260212-drm-rust-next-beb92aee9d75 To: Alexandre Courbot , Danilo Krummrich , Alice Ryhl , David Airlie , Simona Vetter , Miguel Ojeda , Boqun Feng , Gary Guo , =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= , Benno Lossin , Andreas Hindborg , Trevor Gross Cc: nouveau@lists.freedesktop.org, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, Tim Kovalenko X-Mailer: b4 0.14.2 X-Developer-Signature: v=1; a=ed25519-sha256; t=1771387293; l=6122; i=tim.kovalenko@proton.me; s=20260212; h=from:subject:message-id; bh=9d2Vc62rvOiPpAv0Hkn6Wiy01xq2m2s0v3zXT6ns3tQ=; b=XCWTBdsFpHQ/x01zkYW8tXHp3uoZqiNipbkmGPP5tl3V3birQttmkhUkU/SzQj6SQtRxxKDIJ UQItC1vjmCxAcX/TmTwm0CmI+0dMv+HZGHQ3UqAmaB+PmF5qyM+XeVJ X-Developer-Key: i=tim.kovalenko@proton.me; a=ed25519; pk=/+OiulEpgeZifgP4mDE4e5YlV6nMeY+frze/lY/xiHI= X-Endpoint-Received: by B4 Relay for tim.kovalenko@proton.me/20260212 with auth_id=635 X-Original-From: Tim Kovalenko Reply-To: tim.kovalenko@proton.me From: Tim Kovalenko The `Cmdq::new` function was allocating a `PteArray` struct on the stack and was causing a stack overflow with 8216 bytes. Modify the `PteArray` to calculate and write the Page Table Entries directly into the coherent DMA buffer one-by-one. This reduces the stack usage quite a lot. Signed-off-by: Tim Kovalenko --- Changes in v3: - Addressed the comments and re-instated the PteArray type. - PteArray now uses `init` instead of `new` where it writes to `self` page by page. - PteArray just needs a pte pointer obtained from the `gsp_mem.as_slice_mut= `. I hope I understood everything in the V2 email chain and implemented it cor= rectly :) - Link to v2: https://lore.kernel.org/r/20260213-drm-rust-next-v2-1-aa094f7= 8721a@proton.me Changes in v2: - Missed a code formatting issue. - Link to v1: https://lore.kernel.org/r/20260212-drm-rust-next-v1-1-409398b= 12e61@proton.me --- drivers/gpu/nova-core/gsp.rs | 34 +++++++++++++++++++++++----------- drivers/gpu/nova-core/gsp/cmdq.rs | 20 +++++++++++++++----- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 174feaca0a6b9269cf35286dec3acc4d60918904..7dc67fd55ce6ce19cbb750961dc= fb4e373a20b4c 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -2,6 +2,8 @@ =20 mod boot; =20 +use core::iter::Iterator; + use kernel::{ device, dma::{ @@ -30,7 +32,7 @@ GspArgumentsPadded, LibosMemoryRegionInitArgument, // }, - num, + num, // }; =20 pub(crate) const GSP_PAGE_SHIFT: usize =3D 12; @@ -47,16 +49,17 @@ unsafe impl AsBytes for PteArray {} =20 impl PteArray { - /// Creates a new page table array mapping `NUM_PAGES` GSP pages start= ing at address `start`. - fn new(start: DmaAddress) -> Result { - let mut ptes =3D [0u64; NUM_PAGES]; - for (i, pte) in ptes.iter_mut().enumerate() { + /// Initializes the page table array mapping `NUM_PAGES` GSP pages sta= rting at address `start`. + /// This is done "in-memory" without using the stack to avoid overflow= , by writing one page at + /// a time to the memory region + fn init(&mut self, start: DmaAddress) -> Result { + for (i, pte) in self.0.iter_mut().enumerate() { *pte =3D start .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) .ok_or(EOVERFLOW)?; } =20 - Ok(Self(ptes)) + Ok(()) } } =20 @@ -86,16 +89,25 @@ fn new(dev: &device::Device) -> Result { NUM_PAGES * GSP_PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, )?); - let ptes =3D PteArray::::new(obj.0.dma_handle())?; + + let start_addr =3D obj.0.dma_handle(); =20 // SAFETY: `obj` has just been created and we are its sole user. - unsafe { - // Copy the self-mapping PTE at the expected location. + let pte_region =3D unsafe { obj.0 - .as_slice_mut(size_of::(), size_of_val(&ptes))? - .copy_from_slice(ptes.as_bytes()) + .as_slice_mut(size_of::(), NUM_PAGES * size_of::= ())? }; =20 + // As in [`PteArray::init`], this is a one by one GSP Page write = to the memory + // to avoid stack overflow when allocating the whole array at once. + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).en= umerate() { + let pte_value =3D start_addr + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW)?; + + chunk.copy_from_slice(&pte_value.to_ne_bytes()); + } + Ok(obj) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/= cmdq.rs index 46819a82a51adc58423502d9d45730923b843656..132342a1a6d843e999e2d0e4fbc= c76bde2bd8652 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -23,7 +23,7 @@ transmute::{ AsBytes, FromBytes, // - }, + }, // }; =20 use crate::{ @@ -34,10 +34,10 @@ MsgFunction, MsgqRxHeader, MsgqTxHeader, // - }, + }, // PteArray, GSP_PAGE_SHIFT, - GSP_PAGE_SIZE, // + GSP_PAGE_SIZE, }, num, regs, @@ -159,6 +159,7 @@ struct Msgq { #[repr(C)] struct GspMem { /// Self-mapping page table entries. + // ptes: [u64; GSP_PAGE_SIZE / size_of::()], ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, /// CPU queue: the driver writes commands here, and the GSP reads them= . It also contains the /// write and read pointers that the CPU updates. @@ -199,9 +200,18 @@ fn new(dev: &device::Device) -> Result<= Self> { const MSGQ_SIZE: u32 =3D num::usize_into_u32::<{ size_of::()= }>(); const RX_HDR_OFF: u32 =3D num::usize_into_u32::<{ mem::offset_of!(= Msgq, rx) }>(); =20 - let gsp_mem =3D + let mut gsp_mem =3D CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNE= L | __GFP_ZERO)?; - dma_write!(gsp_mem[0].ptes =3D PteArray::new(gsp_mem.dma_handle())= ?)?; + + let start_address =3D gsp_mem.dma_handle(); + + // SAFETY: `gsp_mem` has just been created and we are its sole use= r. + let mem: &mut [GspMem] =3D unsafe { gsp_mem.as_slice_mut(0, 1)? }; + + // Borrowing the array from gsp_mem and writing directly to that i= n the init method of + // PteArray + mem[0].ptes.init(start_address)?; + dma_write!(gsp_mem[0].cpuq.tx =3D MsgqTxHeader::new(MSGQ_SIZE, RX_= HDR_OFF, MSGQ_NUM_PAGES))?; dma_write!(gsp_mem[0].cpuq.rx =3D MsgqRxHeader::new())?; =20 --- base-commit: cea7b66a80412e2a5b74627b89ae25f1d0110a4b change-id: 20260212-drm-rust-next-beb92aee9d75 Best regards, --=20 Tim Kovalenko