From nobody Thu Apr 2 22:24:27 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A845C213E89; Fri, 13 Feb 2026 19:41:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771011690; cv=none; b=dzrJs96FLO64nqKRGZINEqES+HHd34SV5DjJL5cYh/mScO6oQSR8+FyKNHKZ7pXHvDjvzBdKY6GtLhJYgOkSgQ/xXfxDSuAxC8qALVhCmpy6aoyvEl70a9SssPIF84f3V8ufPPPKcdIzvGEDOggxW/OsPjAk/Hupyb52Xm7R2YI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771011690; c=relaxed/simple; bh=npawl3Sh8CB9l+1ZY62uo+ipDHKFXtD/V7TZlj5J+kE=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:To:Cc; b=MHQUfQTjAzlKLTmozL4hLD2dFq2zyoLpm8PWn1UaFwoMjOBI6Ud5vRjZljW9kWi8X4NRBCjxjc2agmbj8qvz7Y+7T6hfdEUHpEBGSpisRHpsqbASS/Rnm/O/SW8TAHsrdJqBvLj5XR5lzHa2M4gv18XEnkxHEutd2YQAcGR6bHA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=OfC5wtKz; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="OfC5wtKz" Received: by smtp.kernel.org (Postfix) with ESMTPS id 284E1C19424; Fri, 13 Feb 2026 19:41:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771011690; bh=npawl3Sh8CB9l+1ZY62uo+ipDHKFXtD/V7TZlj5J+kE=; h=From:Date:Subject:To:Cc:Reply-To:From; b=OfC5wtKzDjjRTMyYDLfYtHcRXqV6U0vmPI2AWkLm7EB7Z/T1v0l2abeRgzhIjaq58 Z6mXUTghHGpMEJjGZqbxnUxJF/dISX+OblcR09PaWXsct7GP+lZ/WGhM61zFVZyZvJ ezgUSifRI4TocQxH7uuzPA29NB6qSsXL9rjlOscWyTxleT5wdzhWwW3O5otlF5rb9J 2YKoA3hcYhfFQdQo+4Bm0KBsyTKSD2wNZYHFxA9+XYiVpAe156iAJxrwUb8AvWdFWx c4/XK2w6SNRQtN0F3WrKjQ3Wh9Vj3BkIafCdUHHajvPr+aXIG8LGqNTYuBj87TzIiK BIkMyutGBksBA== Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0A4CEEF99D7; Fri, 13 Feb 2026 19:41:30 +0000 (UTC) From: Tim Kovalenko via B4 Relay Date: Fri, 13 Feb 2026 14:40:13 -0500 Subject: [PATCH v2] gpu: nova-core: fix stack overflow in GSP memory allocation Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260213-drm-rust-next-v2-1-aa094f78721a@proton.me> X-B4-Tracking: v=1; b=H4sIAAAAAAAC/3XMQQ7CIBCF4as0s3YMjFrFlfcwXRQZLYtCM2BT0 3B3sXuX/0vet0Ji8Zzg2qwgPPvkY6hBuwYeQx9ejN7VBlLUKtKETkaUd8oYeMlo2RrqmY07n6B +JuGnXzbv3tUefMpRPhs/69/6T5o1ajwqczAXq4lbfZsk5hj2I0NXSvkCJpNJDqoAAAA= X-Change-ID: 20260212-drm-rust-next-beb92aee9d75 To: Alexandre Courbot , Danilo Krummrich , Alice Ryhl , David Airlie , Simona Vetter , Miguel Ojeda , Boqun Feng , Gary Guo , =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= , Benno Lossin , Andreas Hindborg , Trevor Gross Cc: nouveau@lists.freedesktop.org, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, Tim Kovalenko X-Mailer: b4 0.14.2 X-Developer-Signature: v=1; a=ed25519-sha256; t=1771011689; l=6183; i=tim.kovalenko@proton.me; s=20260212; h=from:subject:message-id; bh=TbEzlnu4V85B1wrxC0sGqgddAbgVIVV2UDPzTnjjCng=; b=1765X0vwFeNKmn7aEyMFKc6c5HTP2IxuQE8XcwSTcwhN9KrHAJ9dpgfoy/Ysb4YaVm3UM3IJh JbUXKzKcDEtDsz7mFSr+uHD5WK5p5zrFsDtx/d7Af6EsHRCj0EHcXNO X-Developer-Key: i=tim.kovalenko@proton.me; a=ed25519; pk=/+OiulEpgeZifgP4mDE4e5YlV6nMeY+frze/lY/xiHI= X-Endpoint-Received: by B4 Relay for tim.kovalenko@proton.me/20260212 with auth_id=635 X-Original-From: Tim Kovalenko Reply-To: tim.kovalenko@proton.me From: Tim Kovalenko The `Cmdq::new` function was allocating a `PteArray` struct on the stack and was causing a stack overflow with 8216 bytes. Remove the `PteArray` and instead calculate and write the Page Table Entries directly into the coherent DMA buffer one-by-one. This reduces the stack usage quite a lot. Signed-off-by: Tim Kovalenko --- Changes in v2: - Missed a code formatting issue. - Link to v1: https://lore.kernel.org/r/20260212-drm-rust-next-v1-1-409398b= 12e61@proton.me --- drivers/gpu/nova-core/gsp.rs | 50 ++++++++++++++---------------------= ---- drivers/gpu/nova-core/gsp/cmdq.rs | 27 ++++++++++++++++++--- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 174feaca0a6b9269cf35286dec3acc4d60918904..316eeaf87ec5ae67422a34426ee= fa747c9b6502b 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -2,16 +2,14 @@ =20 mod boot; =20 +use core::iter::Iterator; + use kernel::{ device, - dma::{ - CoherentAllocation, - DmaAddress, // - }, + dma::CoherentAllocation, dma_write, pci, - prelude::*, - transmute::AsBytes, // + prelude::*, // }; =20 pub(crate) mod cmdq; @@ -39,27 +37,6 @@ /// Number of GSP pages to use in a RM log buffer. const RM_LOG_BUFFER_NUM_PAGES: usize =3D 0x10; =20 -/// Array of page table entries, as understood by the GSP bootloader. -#[repr(C)] -struct PteArray([u64; NUM_ENTRIES]); - -/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper a= round one. -unsafe impl AsBytes for PteArray {} - -impl PteArray { - /// Creates a new page table array mapping `NUM_PAGES` GSP pages start= ing at address `start`. - fn new(start: DmaAddress) -> Result { - let mut ptes =3D [0u64; NUM_PAGES]; - for (i, pte) in ptes.iter_mut().enumerate() { - *pte =3D start - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; - } - - Ok(Self(ptes)) - } -} - /// The logging buffers are byte queues that contain encoded printf-like /// messages from GSP-RM. They need to be decoded by a special application /// that can parse the buffers. @@ -86,16 +63,25 @@ fn new(dev: &device::Device) -> Result { NUM_PAGES * GSP_PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, )?); - let ptes =3D PteArray::::new(obj.0.dma_handle())?; + + let start_addr =3D obj.0.dma_handle(); =20 // SAFETY: `obj` has just been created and we are its sole user. - unsafe { - // Copy the self-mapping PTE at the expected location. + let pte_region =3D unsafe { obj.0 - .as_slice_mut(size_of::(), size_of_val(&ptes))? - .copy_from_slice(ptes.as_bytes()) + .as_slice_mut(size_of::(), NUM_PAGES * size_of::= ())? }; =20 + // As in [`DmaGspMem`], this is a one by one GSP Page write to th= e memory + // to avoid stack overflow when allocating the whole array at once. + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).en= umerate() { + let pte_value =3D start_addr + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW)?; + + chunk.copy_from_slice(&pte_value.to_ne_bytes()); + } + Ok(obj) } } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/= cmdq.rs index 46819a82a51adc58423502d9d45730923b843656..7a6cb261f4e62ac6210a80f9ecb= 61213cdb91b15 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -35,7 +35,6 @@ MsgqRxHeader, MsgqTxHeader, // }, - PteArray, GSP_PAGE_SHIFT, GSP_PAGE_SIZE, // }, @@ -159,7 +158,7 @@ struct Msgq { #[repr(C)] struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, + ptes: [u64; GSP_PAGE_SIZE / size_of::()], /// CPU queue: the driver writes commands here, and the GSP reads them= . It also contains the /// write and read pointers that the CPU updates. /// @@ -201,7 +200,29 @@ fn new(dev: &device::Device) -> Result<= Self> { =20 let gsp_mem =3D CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNE= L | __GFP_ZERO)?; - dma_write!(gsp_mem[0].ptes =3D PteArray::new(gsp_mem.dma_handle())= ?)?; + const NUM_PAGES: usize =3D GSP_PAGE_SIZE / size_of::(); + + // One by one GSP Page write to the memory to avoid stack overflow= when allocating + // the whole array at once. + let item =3D gsp_mem.item_from_index(0)?; + for i in 0..NUM_PAGES { + let pte_value =3D gsp_mem + .dma_handle() + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) + .ok_or(EOVERFLOW)?; + + // SAFETY: `item_from_index` ensures that `item` is always a v= alid pointer and can be + // dereferenced. The compiler also further validates the expre= ssion on whether `field` + // is a member of `item` when expanded by the macro. + // + // Further, this is dma_write! macro expanded and modified to = allow for individual + // page write. + unsafe { + let ptr_field =3D core::ptr::addr_of_mut!((*item).ptes[i]); + gsp_mem.field_write(ptr_field, pte_value); + } + } + dma_write!(gsp_mem[0].cpuq.tx =3D MsgqTxHeader::new(MSGQ_SIZE, RX_= HDR_OFF, MSGQ_NUM_PAGES))?; dma_write!(gsp_mem[0].cpuq.rx =3D MsgqRxHeader::new())?; =20 --- base-commit: cea7b66a80412e2a5b74627b89ae25f1d0110a4b change-id: 20260212-drm-rust-next-beb92aee9d75 Best regards, --=20 Tim Kovalenko