From nobody Mon Jun 8 12:12:36 2026 Received: from CO1PR03CU002.outbound.protection.outlook.com (mail-westus2azon11010013.outbound.protection.outlook.com [52.101.46.13]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1466A347FE1; Fri, 29 May 2026 07:34:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=fail smtp.client-ip=52.101.46.13 ARC-Seal: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040054; cv=fail; b=AmD9KjGam5BXOouOapQNq6Ew6vOgrgHOB6XaEntsffWXIi4+slBC6HRxCBnTGmmjT7w2nRXXLHPU9b08tQ6ocyFef+aFI1SmHhuTqZwI+6nZ3jUgVsHUyn0TAkvtPb9FBkErccW/W4mnubzQ1Ws1Cg0sHMbs6T70RlzwR/jyAvg= ARC-Message-Signature: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040054; c=relaxed/simple; bh=k+F3H7U8d0PYTjmEDKUBPg7Q8RJmFkFfh0D5RIiB8gU=; h=From:Date:Subject:Content-Type:Message-Id:References:In-Reply-To: To:Cc:MIME-Version; b=TYb36WVsuJ+UnWs0IlBcAnbxFv0SWHM6ZNbDdVZhTiZLNt1LHBhfcVP+NxNEuXZ0SMw2dC+ey8fyHl8H2+VoBDkURE3ah40+9Imwxn8g3WG2oWZqC0FCRSBaIMO6wc3CKxNgP+a8pVrX5lU1W6CvjCoaaRR7CVhRhzhVSwh4Rqg= ARC-Authentication-Results: i=2; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com; spf=fail smtp.mailfrom=nvidia.com; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b=iePAs5Td; arc=fail smtp.client-ip=52.101.46.13 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=nvidia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b="iePAs5Td" ARC-Seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=jDqIbH7Fz0HVeBlh+7XNT+x/ssHLQjW2+jHAqoK/UKlQ/CVpfIbTEvCIc7x4o65dO8/jb6A9Ra6PrFA1i5IbVlRI2/sir9EVENGo7O5vHiLq3imvOSPHNLu1rjTkGCr5f2tVYqcjwAydfu+1/IXkloVIn1BRF1Qy8InqpQ3aN+StktFZ3qIO9XG5Ib9uVJY+25sYnTAIe6jWIFVusqS6jdRqAY0c9POEvSpdc1e5uKe0aUnnqPWhORRIYWOmJ03BL856evzrCYTITdX4xzB6JqERsjY5zC2VaX80mQtDU7N9KQrszSLq+XeH16ObSQm2ORNfLr50Jwyip3UGRNYo8w== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=Legy9iHXCRpYvhmbLaoMds/W31X8HP1lt2LSyQGOlAA=; b=uemWeQQWC6oHhNGl6e1hfF9xo42GKnyQjfjnZI05m3ODGvHWraMePCIUC85MGyEBTgRgW5nmCTR8zs1mOJ9y46L4GKfgat0Dr6lfcHOFatU6xG77wUzRTbOFS/id4I2ffagAzH53H7Ga9J9f0kCiuykQh7coWtA71UiAme1k6OFDThewFGl4LeCeg2sNJKMaJiw2WWcdvZYh5Z315mCO+bED9Do2GYi+sTIR2Awx0nE9geJyg/cmAYdWJNgnOYjbGcFn4HPTMvCulmpvi2v2bOuvp7aSfVcExew4yn5tTx8VI/uFUF5LCjL3gjTndJ6ZRzD84se7C31dsf/Dr5EZcw== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=nvidia.com; dmarc=pass action=none header.from=nvidia.com; dkim=pass header.d=nvidia.com; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=Nvidia.com; s=selector2; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=Legy9iHXCRpYvhmbLaoMds/W31X8HP1lt2LSyQGOlAA=; b=iePAs5Td0/V8ZmvKIW+PS8m6LGJ+yDtW+v4ZPOd27Vk+yNNH0uycJ10eBrJC91aNbbUzZ9DErNrj+n274AnZyIUCo0cLvfCdZ/iMaK+FfC9A6DYeyLUTpTPBEPavX8II88Eeq3+ILJDvtUzPbzCy52MFR97BXXcXCKcBGr/6t6JkgtaCpllaOzTI9LNvS1LptTlUOti7AYEoLt27ZOYLADtgddd2fwUKV2XhdqyLX6Fk9NLzKOMF0gchQvnmZZGEdvGWPlOtBmTtgIGy/ob6y6MbEqLraWwJXqFC6GByy9v5AnKNNIQxHDIkkOWsEJBgGfB3PDmnEEpCxeZuf4EDDA== Authentication-Results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=nvidia.com; Received: from CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) by MN2PR12MB4237.namprd12.prod.outlook.com (2603:10b6:208:1d6::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.21.48.19; Fri, 29 May 2026 07:34:04 +0000 Received: from CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989]) by CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989%4]) with mapi id 15.21.0071.010; Fri, 29 May 2026 07:34:04 +0000 From: Alexandre Courbot Date: Fri, 29 May 2026 16:33:41 +0900 Subject: [PATCH v7 1/4] gpu: nova-core: gsp: move chipset-specific parts of the boot process into a HAL Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260529-nova-unload-v7-1-678f39209e00@nvidia.com> References: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> In-Reply-To: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> To: Danilo Krummrich , Alice Ryhl , David Airlie , Simona Vetter Cc: John Hubbard , Alistair Popple , Timur Tabi , Eliot Courtney , nova-gpu@lists.linux.dev, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, Alexandre Courbot X-Mailer: b4 0.15.2 X-ClientProxiedBy: TYCP301CA0001.JPNP301.PROD.OUTLOOK.COM (2603:1096:400:386::19) To CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: CH2PR12MB3990:EE_|MN2PR12MB4237:EE_ X-MS-Office365-Filtering-Correlation-Id: 91cfff18-221e-42e0-26c5-08debd54a8f5 X-MS-Exchange-SenderADCheck: 1 X-MS-Exchange-AntiSpam-Relay: 0 X-Microsoft-Antispam: BCL:0;ARA:13230040|366016|1800799024|376014|10070799003|18002099003|22082099003|11063799006|56012099006|3023799007|6133799003; X-Microsoft-Antispam-Message-Info: RYCHXEAssoeXttCYyU55PAu6LQMJTnySlUXDwxbeUA5IfYKEN5gH9ndnEBwnTXU3ZPXM2pC4UFbJ8CedknVDU5LaFJJMsoM0T0e2MkTXxik+Bsiu0/l7OFyYRvAGrAV2t5wgTeXuOSGaUiByWPpctm/ucLjHkAy0pVheDkrM2x+VqzcpJ9dXbAOnDFqkiAsptO0E4vYhSok6JbPrPosMsbQ+1iItopPMarVJB3ykmIsMEio4o3nTOJ2FBMZFOuMNMjZ3CljUwAfl+MbZt3m3M8xywOQ2krUdOdLwFUivunTTiXKi+rShtW4brzmspRTtvI1bGvnFyUb9bXE/d5ARWe/GsLgrHXUGf/avQRUXCaZNRJZVdfMdAHTjl6vzLCtCRRZjwu07ql7IVc2FHPiNwM0QQMzQ9X0DhvL8WS+nVdSSsb5waFDF3LEuh9w4Pio1R8t248ghUusJbr/Ri/fuUPTjNMr5Fp/HK8StkZSaLxTPhECXV/kzFXSDjtrBcN83Ko8WprRun7HyumIehYwHT5Pep7/OPV7Op076yo+EKoYcFOhnSPAXzC30MXrfiiNTOWE4RL0gHRvX3Zo3jIX1ZZNyQwOtiwz1cMSQBlnfdlGuLZPrIg6AU+ZaqVZuR6J3C3rJfC30eHNpy4sNpkl514j78W3e62d+eza6y7p3gTwXoikleirB1mS+J7mVKIQi X-Forefront-Antispam-Report: CIP:255.255.255.255;CTRY:;LANG:en;SCL:1;SRV:;IPV:NLI;SFV:NSPM;H:CH2PR12MB3990.namprd12.prod.outlook.com;PTR:;CAT:NONE;SFS:(13230040)(366016)(1800799024)(376014)(10070799003)(18002099003)(22082099003)(11063799006)(56012099006)(3023799007)(6133799003);DIR:OUT;SFP:1101; X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 2 X-MS-Exchange-AntiSpam-MessageData-0: =?utf-8?B?WlFzYjhGQzkvWEJ5QU5VM3JIYTRoMlRGcjI1bUlYUlE1SXUwSitTajFLQXc1?= =?utf-8?B?a3RKSllqU21wVDBTcStaTXBLM2NlRHBFeVcrM2NjcVYra2cwT2NDamJYYmpM?= =?utf-8?B?MzNBNHpkdm1uRXcyVUF0bko2UU90ZVh6a1VlcU1HLzRzM2FZSFk2Qmgwa2Mz?= =?utf-8?B?R3JNUTAzNGtoN1BtUUxJakFzdEJrRG9EK3FVaUIybVpUOVU2T251aGpyVUpB?= =?utf-8?B?b3hTZ3VBOStJYlZ3NFl1aWxicm5MQmJ3WGduQk9ITW9CSHFQdktheVFyeDNS?= =?utf-8?B?Y1pVZWdXVWlTU3M3RmJINXVEOFQ5TUROekRvRGNmVnU4Tk9DbktYejFCRnlR?= =?utf-8?B?TWpFdEQzN0ZEdnZvZ1d3RWdZYS9JaVVqTE1oSzZtR0d1SzVlemJTN2RRbVVx?= =?utf-8?B?RHQ2RnVDcXY0KytBYUlUTkxMYWpIMllwU0xRc3VhOGRtRWZZR3hhU08xZjRt?= =?utf-8?B?aWNBclFhV3B3b21nREdiTUxxalQ4WlhRTW1CbVl4UnF6blNXMHQ2dnNoMUh1?= =?utf-8?B?bWI4M28zU2FRQlVqbVVpVnh0UG5rOVRUbGFBMnEwUVFsS25XS0piUVB6Y0pW?= =?utf-8?B?cVo3QWJnZEJSQjZNRUVjKzhBU1daN2MvSmhIa3JoQXBLdGl5b2F4aFVLN3Bh?= =?utf-8?B?eFpwdHNrZy9Sak9oQlM1cEJ4a2dCemw1WmxPVHFiZVpvRmFiYXdhbzIyZVU0?= =?utf-8?B?eFpMZVZwS1lFZjZXVmg3RHlXdEV3SVptUHVTQzBJM1VrUTlYaUlRaURhUE9t?= =?utf-8?B?L2tnL0E1UlA3M09xOC9CRFphQUZ3ek9YcmFZSmNaTU1jVDc1QmJBZ2UzNXJa?= =?utf-8?B?U3V6ajhpeE1iWE94VzcwRzh3bzcyRFNWVTBEbEJZYjJ0QUlVS2VLa1MwdnI5?= =?utf-8?B?Q0NscUZGa3dnajBVRHRqcEpOTTVKY3JURDhwWVFyWnY0OWR4MGlsZUFCWEI4?= =?utf-8?B?MGQ0NUF6anB3RkNaUmJ2UnlqTnRJTWFDMUhERVN3WCtqekNaalZ2SVp4U0JH?= =?utf-8?B?VFJvVENYaGZ2Qm9jWUtyLzdvSkhncWRSME9iSkpKTnkvbDdLUTFpQUZXd3Yv?= =?utf-8?B?MEIzcU05SjlNbGJRSjZKVmNTVWlqR2Q1Y2ZkY2RkUlBTNmErWmJ2eHN6QzZ1?= =?utf-8?B?anlUVUwwd0dQeFFxQ01aeVZWWWtFQlpMTCs3ZzY0MzFMUHZSZmpBN0RLR0Zt?= =?utf-8?B?SUJESURXTnJZLzdTUU5ocGUxSlZreDVzMlJBZG1uekhacmRIQ041eGF0VGt4?= =?utf-8?B?MHl4cFFVVllZV3JEaHJHS3RCNTJ0MGdyeVF0ODhRZitVMTBBWjE5aFcwUlRJ?= =?utf-8?B?eEhPTUcwUUpJRVVNN1BGRVdNVWJvT1Y1dHJuaUQ2OHFac2hlcVE0RVJrZGJ6?= =?utf-8?B?dnM2OGFBNEt5MVcvYWM0bU1ia1IvZmU5QkVDV3Jtb3F3TEdiWWRJY1BZOHFm?= =?utf-8?B?UXpRdmhMc09ySi9WMG1nNTlnb3I2aEhzdGljZFhvWjFYK2ErbFJaSkNEMXNG?= =?utf-8?B?c1l1b0tqajZzSEw5UFczZW91UTlpcjNGOGloRW1SdFd4U1pkdlQzLzF5akh6?= =?utf-8?B?d0dmNkZHYU9ycUFqSlZqdUZNUmJCMFE0YTBEN3FlWnIrenFzdW16NUJiRDVW?= =?utf-8?B?T28wZ084YlMrVlN4SHVlRW0xK09HTnk2VVNmeE9vV2w2cUFCMHIrWnRYUUxD?= =?utf-8?B?UEUzcFRaelhDbTNUWFRwNGpVckt5NlhSWVZ0UnpJVEUzcTJUY0pzcytSajlM?= =?utf-8?B?anNXUDZic25uM1VTYnE1R0hHUko2bS9lcE1WVDVMamdQbm9pSkdUcHBkZWdU?= =?utf-8?B?WTI3QStFOG9BK1VEaXNTMEkydTRBN284RXJUVndwVDJYOXhGQ1QzS215YktI?= =?utf-8?B?U2hPR25WV2tIV2gyL0JLUFE5a1hGWGNDMXRyODgrWERXSWFXV0Q0OXFOSDNz?= =?utf-8?B?SHJ5M3MxUVAxNEYzYkJVV2RCYmRzbzlqSHpnWnExUUhkWUhld1Nmc0JwRytu?= =?utf-8?B?VTBpR1RDN2RNZ1J4KzR2WDBDMmtQTzkwZmNPSURNZDZCVHBCUWVSeUp2aFJI?= =?utf-8?B?L3RiQzVPcDZkQnY4Z2V4eVU3YjVRMkQrVDhBU01vVXN3MG5QcUVVdDV3Qk1s?= =?utf-8?B?L0V1UUlaV0hzVWZobTBJdG1vK295NE85SzZQVW1aWGhFVlVDMDVtRm5jelFT?= =?utf-8?B?WkZSbytibzl6dGl5S1pVZ0g2ZnlPUmw4QW1YYUVyeVdYblhWbkUzcnh0TjZN?= =?utf-8?B?T3l6aGlQOTJlek1xejI2Q2ZhL2VHdmRMNE9mOGQwUDIzMHlHOUFXR0JkRi81?= =?utf-8?B?c2FaMjBDeVNHYkQwOHlyYnBvbnJrZVQ5Y25PUTZmaWZtaDhyMVJtQ0hJS2oz?= =?utf-8?Q?stZqPviUhRlv6igSCLk8iKSp8OCA432YVXC5Ah3OmaeyL?= X-MS-Exchange-AntiSpam-MessageData-1: V5nXxT8u6axuvw== X-OriginatorOrg: Nvidia.com X-MS-Exchange-CrossTenant-Network-Message-Id: 91cfff18-221e-42e0-26c5-08debd54a8f5 X-MS-Exchange-CrossTenant-AuthSource: CH2PR12MB3990.namprd12.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 29 May 2026 07:34:04.4529 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 43083d15-7273-40c1-b7db-39efd9ccc17a X-MS-Exchange-CrossTenant-MailboxType: HOSTED X-MS-Exchange-CrossTenant-UserPrincipalName: 8Bg+Kk4gerPfqlIKdod7+bO+VXJgR4WMdXWdogxdRGo4B7AJvUJsN4OT6uGELCzHnwF0v/cJuB704s1mupuFPA== X-MS-Exchange-Transport-CrossTenantHeadersStamped: MN2PR12MB4237 Booting the GSP is done differently depending on the architecture. Move the parts that are chipset-specific under a HAL. This does not change much at the moment, since the differences between Turing and Ampere are rather benign, but will become critical to properly support the FSP boot process used by Hopper and Blackwell. The Hopper/Blackwell support is not merged yet, so their HAL is a stub for now. This patch is intended to be a mechanical code extraction with no behavioral changes. Reviewed-by: Eliot Courtney Signed-off-by: Alexandre Courbot Reviewed-by: Danilo Krummrich --- drivers/gpu/nova-core/gsp.rs | 1 + drivers/gpu/nova-core/gsp/boot.rs | 166 +++----------------------- drivers/gpu/nova-core/gsp/hal.rs | 74 ++++++++++++ drivers/gpu/nova-core/gsp/hal/gh100.rs | 50 ++++++++ drivers/gpu/nova-core/gsp/hal/tu102.rs | 206 +++++++++++++++++++++++++++++= ++++ 5 files changed, 344 insertions(+), 153 deletions(-) diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index ba5b7f990031..38378f104068 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 =20 mod boot; +mod hal; =20 use kernel::{ debugfs, diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/= boot.rs index 259f7c4d94f5..1bd9f21fc443 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -4,7 +4,6 @@ device, dma::Coherent, io::poll::read_poll_timeout, - io::Io, pci, prelude::*, time::Delta, // @@ -19,121 +18,17 @@ }, fb::FbLayout, firmware::{ - booter::{ - BooterFirmware, - BooterKind, // - }, - fwsec::{ - bootloader::FwsecFirmwareWithBl, - FwsecCommand, - FwsecFirmware, // - }, gsp::GspFirmware, FIRMWARE_VERSION, // }, - gpu::{ - Architecture, - Chipset, // - }, + gpu::Chipset, gsp::{ commands, - sequencer::{ - GspSequencer, - GspSequencerParams, // - }, GspFwWprMeta, // }, - regs, - vbios::Vbios, }; =20 impl super::Gsp { - /// Helper function to load and run the FWSEC-FRTS firmware and confir= m that it has properly - /// created the WPR2 region. - fn run_fwsec_frts( - dev: &device::Device, - chipset: Chipset, - falcon: &Falcon, - bar: &Bar0, - bios: &Vbios, - fb_layout: &FbLayout, - ) -> Result<()> { - // Check that the WPR2 region does not already exists - if it does= , we cannot run - // FWSEC-FRTS until the GPU is reset. - if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() !=3D= 0 { - dev_err!( - dev, - "WPR2 region already exists - GPU needs to be reset to pro= ceed\n" - ); - return Err(EBUSY); - } - - // FWSEC-FRTS will create the WPR2 region. - let fwsec_frts =3D FwsecFirmware::new( - dev, - falcon, - bar, - bios, - FwsecCommand::Frts { - frts_addr: fb_layout.frts.start, - frts_size: fb_layout.frts.len(), - }, - )?; - - if chipset.needs_fwsec_bootloader() { - let fwsec_frts_bl =3D FwsecFirmwareWithBl::new(fwsec_frts, dev= , chipset)?; - // Load and run the bootloader, which will load FWSEC-FRTS and= run it. - fwsec_frts_bl.run(dev, falcon, bar)?; - } else { - // Load and run FWSEC-FRTS directly. - fwsec_frts.run(dev, falcon, bar)?; - } - - // SCRATCH_E contains the error code for FWSEC-FRTS. - let frts_status =3D bar - .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR) - .frts_err_code(); - if frts_status !=3D 0 { - dev_err!( - dev, - "FWSEC-FRTS returned with error code {:#x}\n", - frts_status - ); - - return Err(EIO); - } - - // Check that the WPR2 region has been created as we requested. - let (wpr2_lo, wpr2_hi) =3D ( - bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(), - bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(), - ); - - match (wpr2_lo, wpr2_hi) { - (_, 0) =3D> { - dev_err!(dev, "WPR2 region not created after running FWSEC= -FRTS\n"); - - Err(EIO) - } - (wpr2_lo, _) if wpr2_lo !=3D fb_layout.frts.start =3D> { - dev_err!( - dev, - "WPR2 region created at unexpected address {:#x}; expe= cted {:#x}\n", - wpr2_lo, - fb_layout.frts.start, - ); - - Err(EIO) - } - (wpr2_lo, wpr2_hi) =3D> { - dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); - dev_dbg!(dev, "GPU instance built\n"); - - Ok(()) - } - } - } - /// Attempt to boot the GSP. /// /// This is a GPU-dependent and complex procedure that involves loadin= g firmware files from @@ -149,17 +44,8 @@ pub(crate) fn boot( gsp_falcon: &Falcon, sec2_falcon: &Falcon, ) -> Result { - // The FSP boot process of Hopper+ is not supported for now. - if matches!( - chipset.arch(), - Architecture::Hopper | Architecture::BlackwellGB10x | Architec= ture::BlackwellGB20x - ) { - return Err(ENOTSUPP); - } - let dev =3D pdev.as_ref(); - - let bios =3D Vbios::new(dev, bar)?; + let hal =3D super::hal::gsp_hal(chipset); =20 let gsp_fw =3D KBox::pin_init(GspFirmware::new(dev, chipset, FIRMW= ARE_VERSION), GFP_KERNEL)?; =20 @@ -168,38 +54,21 @@ pub(crate) fn boot( =20 let wpr_meta =3D Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new= (&gsp_fw, &fb_layout))?; =20 - // FWSEC-FRTS is not executed on chips where the FRTS region size = is 0 (e.g. GA100). - if !fb_layout.frts.is_empty() { - Self::run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, &fb= _layout)?; - } - - gsp_falcon.reset(bar)?; - let libos_handle =3D self.libos.dma_handle(); - let (mbox0, mbox1) =3D gsp_falcon.boot( - bar, - Some(libos_handle as u32), - Some((libos_handle >> 32) as u32), - )?; - dev_dbg!(pdev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1); - - dev_dbg!( - pdev, - "Using SEC2 to load and run the booter_load firmware...\n" - ); - - BooterFirmware::new( + // Perform the chipset-specific boot sequence. + hal.boot( + &self, dev, - BooterKind::Loader, - chipset, - FIRMWARE_VERSION, - sec2_falcon, bar, - )? - .run(dev, bar, sec2_falcon, &wpr_meta)?; + chipset, + &fb_layout, + &wpr_meta, + gsp_falcon, + sec2_falcon, + )?; =20 gsp_falcon.write_os_version(bar, gsp_fw.bootloader.app_version); =20 - // Poll for RISC-V to become active before running sequencer + // Poll for RISC-V to become active before continuing. read_poll_timeout( || Ok(gsp_falcon.is_riscv_active(bar)), |val: &bool| *val, @@ -214,16 +83,7 @@ pub(crate) fn boot( self.cmdq .send_command_no_wait(bar, commands::SetRegistry::new())?; =20 - // Create and run the GSP sequencer. - let seq_params =3D GspSequencerParams { - bootloader_app_version: gsp_fw.bootloader.app_version, - libos_dma_handle: libos_handle, - gsp_falcon, - sec2_falcon, - dev, - bar, - }; - GspSequencer::run(&self.cmdq, seq_params)?; + hal.post_boot(&self, dev, bar, &gsp_fw, gsp_falcon, sec2_falcon)?; =20 // Wait until GSP is fully initialized. commands::wait_gsp_init_done(&self.cmdq)?; diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/h= al.rs new file mode 100644 index 000000000000..fb3edaeb3160 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/hal.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIA= TES. All rights reserved. + +mod gh100; +mod tu102; + +use kernel::prelude::*; + +use kernel::{ + device, + dma::Coherent, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp as GspEngine, + sec2::Sec2, + Falcon, // + }, + fb::FbLayout, + firmware::gsp::GspFirmware, + gpu::{ + Architecture, + Chipset, // + }, + gsp::{ + Gsp, + GspFwWprMeta, // + }, +}; + +/// Trait implemented by GSP HALs. +pub(super) trait GspHal: Send { + /// Performs the GSP boot process, loading and running the required fi= rmwares as needed. + #[allow(clippy::too_many_arguments)] + fn boot( + &self, + gsp: &Gsp, + dev: &device::Device, + bar: &Bar0, + chipset: Chipset, + fb_layout: &FbLayout, + wpr_meta: &Coherent, + gsp_falcon: &Falcon, + sec2_falcon: &Falcon, + ) -> Result; + + /// Performs HAL-specific post-GSP boot tasks. + /// + /// This method is called by the GSP boot code after the GSP is confir= med to be running, and + /// after the initialization commands have been pushed onto its queue. + fn post_boot( + &self, + _gsp: &Gsp, + _dev: &device::Device, + _bar: &Bar0, + _gsp_fw: &GspFirmware, + _gsp_falcon: &Falcon, + _sec2_falcon: &Falcon, + ) -> Result { + Ok(()) + } +} + +/// Returns the GSP HAL to be used for `chipset`. +pub(super) fn gsp_hal(chipset: Chipset) -> &'static dyn GspHal { + match chipset.arch() { + Architecture::Turing | Architecture::Ampere | Architecture::Ada = =3D> tu102::TU102_HAL, + Architecture::Hopper | Architecture::BlackwellGB10x | Architecture= ::BlackwellGB20x =3D> { + gh100::GH100_HAL + } + } +} diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core= /gsp/hal/gh100.rs new file mode 100644 index 000000000000..3f3675f9c16a --- /dev/null +++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIA= TES. All rights reserved. + +use kernel::prelude::*; + +use kernel::{ + device, + dma::Coherent, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp as GspEngine, + sec2::Sec2, + Falcon, // + }, + fb::FbLayout, + gpu::Chipset, + gsp::{ + hal::GspHal, + Gsp, + GspFwWprMeta, // + }, +}; + +struct Gh100; + +impl GspHal for Gh100 { + /// Boot GSP via FSP Chain of Trust (Hopper/Blackwell+ path). + /// + /// This path uses FSP to establish a chain of trust and boot GSP-FMC.= FSP handles + /// the GSP boot internally - no manual GSP reset/boot is needed. + fn boot( + &self, + _gsp: &Gsp, + _dev: &device::Device, + _bar: &Bar0, + _chipset: Chipset, + _fb_layout: &FbLayout, + _wpr_meta: &Coherent, + _gsp_falcon: &Falcon, + _sec2_falcon: &Falcon, + ) -> Result { + Err(ENOTSUPP) + } +} + +const GH100: Gh100 =3D Gh100; +pub(super) const GH100_HAL: &dyn GspHal =3D &GH100; diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core= /gsp/hal/tu102.rs new file mode 100644 index 000000000000..a6f2b2e279e8 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIA= TES. All rights reserved. + +use kernel::prelude::*; + +use kernel::{ + device, + dma::Coherent, + io::Io, // +}; + +use crate::{ + driver::Bar0, + falcon::{ + gsp::Gsp as GspEngine, + sec2::Sec2, + Falcon, // + }, + fb::FbLayout, + firmware::{ + booter::{ + BooterFirmware, + BooterKind, // + }, + fwsec::{ + bootloader::FwsecFirmwareWithBl, + FwsecCommand, + FwsecFirmware, // + }, + gsp::GspFirmware, + FIRMWARE_VERSION, // + }, + gpu::Chipset, + gsp::{ + hal::GspHal, + sequencer::{ + GspSequencer, + GspSequencerParams, // + }, + Gsp, + GspFwWprMeta, // + }, + regs, + vbios::Vbios, // +}; + +/// Helper function to load and run the FWSEC-FRTS firmware and confirm th= at it has properly +/// created the WPR2 region. +fn run_fwsec_frts( + dev: &device::Device, + chipset: Chipset, + falcon: &Falcon, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, +) -> Result<()> { + // Check that the WPR2 region does not already exist - if it does, we = cannot run + // FWSEC-FRTS until the GPU is reset. + if bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound() !=3D 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed= \n" + ); + return Err(EBUSY); + } + + // FWSEC-FRTS will create the WPR2 region. + let fwsec_frts =3D FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.len(), + }, + )?; + + if chipset.needs_fwsec_bootloader() { + let fwsec_frts_bl =3D FwsecFirmwareWithBl::new(fwsec_frts, dev, ch= ipset)?; + // Load and run the bootloader, which will load FWSEC-FRTS and run= it. + fwsec_frts_bl.run(dev, falcon, bar)?; + } else { + // Load and run FWSEC-FRTS directly. + fwsec_frts.run(dev, falcon, bar)?; + } + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status =3D bar + .read(regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR) + .frts_err_code(); + if frts_status !=3D 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}\n", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) =3D ( + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO).lower_bound(), + bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) =3D> { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRT= S\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo !=3D fb_layout.frts.start =3D> { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected= {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) =3D> { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } +} + +struct Tu102; + +impl GspHal for Tu102 { + fn boot( + &self, + gsp: &Gsp, + dev: &device::Device, + bar: &Bar0, + chipset: Chipset, + fb_layout: &FbLayout, + wpr_meta: &Coherent, + gsp_falcon: &Falcon, + sec2_falcon: &Falcon, + ) -> Result { + let bios =3D Vbios::new(dev, bar)?; + + // FWSEC-FRTS is not executed on chips where the FRTS region size = is 0 (e.g. GA100). + if !fb_layout.frts.is_empty() { + run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, fb_layout= )?; + } + + gsp_falcon.reset(bar)?; + let libos_handle =3D gsp.libos.dma_handle(); + let (mbox0, mbox1) =3D gsp_falcon.boot( + bar, + Some(libos_handle as u32), + Some((libos_handle >> 32) as u32), + )?; + dev_dbg!(dev, "GSP MBOX0: {:#x}, MBOX1: {:#x}\n", mbox0, mbox1); + + dev_dbg!( + dev, + "Using SEC2 to load and run the booter_load firmware...\n" + ); + + BooterFirmware::new( + dev, + BooterKind::Loader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )? + .run(dev, bar, sec2_falcon, wpr_meta)?; + + Ok(()) + } + + fn post_boot( + &self, + gsp: &Gsp, + dev: &device::Device, + bar: &Bar0, + gsp_fw: &GspFirmware, + gsp_falcon: &Falcon, + sec2_falcon: &Falcon, + ) -> Result { + // Create and run the GSP sequencer. + let seq_params =3D GspSequencerParams { + bootloader_app_version: gsp_fw.bootloader.app_version, + libos_dma_handle: gsp.libos.dma_handle(), + gsp_falcon, + sec2_falcon, + dev, + bar, + }; + GspSequencer::run(&gsp.cmdq, seq_params)?; + + Ok(()) + } +} + +const TU102: Tu102 =3D Tu102; +pub(super) const TU102_HAL: &dyn GspHal =3D &TU102; --=20 2.54.0 From nobody Mon Jun 8 12:12:36 2026 Received: from CO1PR03CU002.outbound.protection.outlook.com (mail-westus2azon11010013.outbound.protection.outlook.com [52.101.46.13]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BAFFF3A0B36; Fri, 29 May 2026 07:34:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=fail smtp.client-ip=52.101.46.13 ARC-Seal: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040056; cv=fail; b=VcBiD5v7VC5cWFbn+6I2BA9fIWMGTZbbstgmF2cI0/YQTVXl9mnwfN3XgsupmjbJMiKcv5WnJAz5pgEm06YglbhDtUp99C589l07hsvVc79D8cz8vt+UVR2PbOvCainUj7uC3GOBi+gFO/mt+eouuMcMsDNTtdA5iYYD9qkDCkY= ARC-Message-Signature: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040056; c=relaxed/simple; bh=O373lh7+kAMjlmfPJ6frcU10RS7k/8wrgOF7Ni3WuUk=; h=From:Date:Subject:Content-Type:Message-Id:References:In-Reply-To: To:Cc:MIME-Version; b=MNwZzI11M2x3OTT8mqqjy1jEvEPudjF9IF6FhDhhUKjcCYMTO1ezATTmFzq7Q7qERFUd09wAvkhhuUd9DbynHLnipxZ/1S3W86UdxE12g7vx/boajxtmVtskVa+nLHT1qTPIwOR+vqfeAanB7e8dc4/TYmqf89iVb2FAatsYCug= ARC-Authentication-Results: i=2; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com; spf=fail smtp.mailfrom=nvidia.com; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b=VWn7rAKd; arc=fail smtp.client-ip=52.101.46.13 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=nvidia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b="VWn7rAKd" ARC-Seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=uUZTfTlBAXj4z3giog3ezHlX59eZVg5BbnmgRmFAOxqPk68lKndASdoBQ4194tQesFdQLVEtkd778obN/269DdGJl9AyAVtgbJzY43db7Co/Riz3kacFJBe6d8hFgtIHydeQjq1vVBK4/jND8xrJmE/6tFPyV+h8BqO6p8mRRPODM4JRYSVtwd0QpuGnt6y3Z2YcVQb1FTprLtCWnBFVAOxPLw141X5qX2RMa/DEepRw7XZ/LVRoRFrgK+KTqrTugK0zGI8U/xtapQItBWQ9ezURZrZJuEqeFRtzJCut7cPrCe+KGgL+djiABDODmYQltyUG9px42D2oYS8JSg/7Cg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=ZE+5ojSoUd1hFYGILpmlPDQC9m7/1quJg3LoZFDDmFg=; b=jZMcQwzSU+CuwBD2NRghrCYQTaQKLBKsN3/Nb1PBKLU1xBjRMCp1MyL1ae3Ow0T+yfXxqh1kwv4Zt9Dg61tIw3NqFXPzIahiiNP7OwAa+0ROkSpuCKYrFmIvxyOg3UBbPAybhpuZoJpcank8jvJn/YlKORJb8qvSy2Q3dLoLQkthhFROitlkGh42nIueI1D4rWUiVNmhSFdLNpHzopH9Fve1ZcaDYndji7c3TwvlNmRFc85DULLMVa6mqG6WZAzJc8vfCWlm6xy7CJbdDaxjm/V4KPvpyGNSjbE+t/i+TnL4ScjmLnu1qHi+JtV7AXUKPaODI/Z+Ayl8gywmhqesEQ== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=nvidia.com; dmarc=pass action=none header.from=nvidia.com; dkim=pass header.d=nvidia.com; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=Nvidia.com; s=selector2; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=ZE+5ojSoUd1hFYGILpmlPDQC9m7/1quJg3LoZFDDmFg=; b=VWn7rAKd1WL/GrFzjgRuZYNkjMcATxgYHFudhgRub3GnGXkoBKGO0Lch8ZY5H/Q+aYGXlt9dDOugJOWOUylANkT5T8M5ZoJwR5JK/PyiUSN+6v4vP0x0/A1Yeam2AGzkS/1fxJR77o290FEVDis1nZgRKUYY5he2UdsUDisEIHG5/T7m45QNIK18xnLYt4v5XZ92pmh48a0C4zzqZ5triV9dN0rYaT9gK1aHRK20U2fimMgIZiG5U1KuNnhz46dAA0yGLYimrYhLxtOsU9ECjacuhOc3776hDQz5R385bm8zKj+UTxqoapdl2FfVhzU0brtL10LhFbf94oYBhuG3lQ== Authentication-Results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=nvidia.com; Received: from CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) by MN2PR12MB4237.namprd12.prod.outlook.com (2603:10b6:208:1d6::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.21.48.19; Fri, 29 May 2026 07:34:08 +0000 Received: from CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989]) by CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989%4]) with mapi id 15.21.0071.010; Fri, 29 May 2026 07:34:08 +0000 From: Alexandre Courbot Date: Fri, 29 May 2026 16:33:42 +0900 Subject: [PATCH v7 2/4] gpu: nova-core: send UNLOADING_GUEST_DRIVER GSP command upon unloading Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260529-nova-unload-v7-2-678f39209e00@nvidia.com> References: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> In-Reply-To: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> To: Danilo Krummrich , Alice Ryhl , David Airlie , Simona Vetter Cc: John Hubbard , Alistair Popple , Timur Tabi , Eliot Courtney , nova-gpu@lists.linux.dev, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, Alexandre Courbot X-Mailer: b4 0.15.2 X-ClientProxiedBy: TYCP286CA0325.JPNP286.PROD.OUTLOOK.COM (2603:1096:400:3b7::8) To CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: CH2PR12MB3990:EE_|MN2PR12MB4237:EE_ X-MS-Office365-Filtering-Correlation-Id: 61f89f2e-1ee7-4b96-e08a-08debd54ab51 X-MS-Exchange-SenderADCheck: 1 X-MS-Exchange-AntiSpam-Relay: 0 X-Microsoft-Antispam: BCL:0;ARA:13230040|366016|1800799024|376014|10070799003|18002099003|22082099003|11063799006|56012099006|3023799007|6133799003; X-Microsoft-Antispam-Message-Info: NIHooNwPiivn+VewCo5agcyMRoCYQkceOcudUs1itAaZf+TEykNQnRZLBu4Qwq2HuoZCWPUVGQC0cHyJZsXVH1MKTO+4J/kpuSNy5heDBLXMt4M8vG0PA5ydoMtzhYwhm9LVtEHbCvnAVI/kgwoSeUKayjSGrCEggW9v2r1qaBzFZNNjZw6X9mRqUx/eo3Xmf9flIQkFX/uxIXR4qzl6zLX1Fc58t9+hImeZ80PQ2RO/9nyJxIpxqvNFt6s/G3GI0+OEeUIPhuq13wkyFz5m4B9I4oILITZwm0AQ/L+y+Yb3H4ax93KbGblmbjyKJkp3+ogfy3WUU2+1y7LlmVFuPBMt7PURQo5tqW7dyz0JoAucDGRk0mjetBQ6aeoY7JN5UVZeTjN0TFoXW/qW34wRXbbrn0owR6SVfdHo/5AdS/dIz6TVHcq/qt+pdjzm5t/kVt49/toF5b9EPjxytAQOl0j9FVF/l8jOl166LhkHHv0NdhLbmZ4e9gmWY9vlTe/q70Ho9YtdMJ7oNISCyIUCG8IF7ZBtuILECFpq74/tEJ9C6xk7eYfgT1iHaJXaDKUQAaZnP75BM8z9dPelMIn8fNitQ6lu9i0PuCjut8jXm+dK3jxV362Oc/xP+wKNdsSu0KqmOUi4M7MS8ut4Ho3NbTHAwiEUi7T/LyUmEWvqHU2VzYco8KTUSTaVoX9ImMq0 X-Forefront-Antispam-Report: CIP:255.255.255.255;CTRY:;LANG:en;SCL:1;SRV:;IPV:NLI;SFV:NSPM;H:CH2PR12MB3990.namprd12.prod.outlook.com;PTR:;CAT:NONE;SFS:(13230040)(366016)(1800799024)(376014)(10070799003)(18002099003)(22082099003)(11063799006)(56012099006)(3023799007)(6133799003);DIR:OUT;SFP:1101; X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 2 X-MS-Exchange-AntiSpam-MessageData-0: =?utf-8?B?WUNJUWlLSitQN1IxNkREeUYvUURnS3lHcnRWVDJZckhlUkgwMFdRUHFlb1Rk?= =?utf-8?B?Qkh6VjVvbnlEWHpxa0g0UE01dFVRWnM4RURnZkFTUnBhSjJBcmxWMzZZWTZ0?= =?utf-8?B?RktnUXk4cFdDQ3czWUlBV2ttUmNxS0NzNG5PQ1pVbzRvZHBZTmJnbis5cXNL?= =?utf-8?B?WmpXdGI0MEVZY0d4aER2d1RNV0lrZEQ1TjBlditjbC9DSEhjN215em5jSkF6?= =?utf-8?B?dmZsd2x4Tkl5akJkdk13QzZSQjZMY0RXM3pjUVdVZm5reUNFRmYxUnpXcDZj?= =?utf-8?B?Q3krY2pOTUZCbkRmdjYzMlpkSWViNktFZFRyYWJQeEtYbEZyVlNNZ3g2QTJi?= =?utf-8?B?MnJHMW5oS2lsYzFObUNTQ2JRWldET1VVQzVwMVREUGhmeVJiV2NIM3UxSm1v?= =?utf-8?B?UFFNMzNoSnY4dnI1UFVZUmVvOXlQN2xic0NQT2M2Sys1YjBnQWc2dDdOdDRV?= =?utf-8?B?cVA1NHVxVUFaMVZxMnMxOU1yUFYrUXUxOTZGQ2drMHZKSVFhUjdDd2JpQzBB?= =?utf-8?B?UjJIS2JTZVpDdWZqbEJtNjJKUSt4WFQvTHI0NnI5V1F4ZVA3QXBaejNmQlc0?= =?utf-8?B?QklhbUlRT0l0WmcxdkxzNGl6eXNzdHFkR0hNeWF6aGhYRVJWTE1acmlPcnla?= =?utf-8?B?K0ZDdjdRNnc3bXZSeWpMRGNwZnB0SmI1OXMrb1BydXI1MVA3MEo1a1hKNUIy?= =?utf-8?B?T3dueGdNTEpGSTZTeVo4b2ZzS0doUGhrKzBnNjNrYXZ5eEZtOTBhMHZrd0w1?= =?utf-8?B?NGJkUUFKWnZTVE1MM2tYa0xoUUJGNkFudWlXdmUxNWt5QTZTbFc5VVhYTkFr?= =?utf-8?B?Ym9kWG1saE1uN3VmVFAwNEhDUUhNRUxMbDBodzROK2h5OW00eVNyMFRWWU5u?= =?utf-8?B?NTJMc2wyN0ZGY25USFlsV216cS9XQ0d1M05hUlZ6N2padURLaTZ3VlQ2cXVP?= =?utf-8?B?b1JxMm04ZmtDUXNwazVuQ3ZXRWQrQjhSZzhJMVNVWkkyY05kZTRRSFpmeHZn?= =?utf-8?B?bk85OW5mTlpBejA1d0R0VWVpOHIzbGI4NGt4Tzh1NkhmTlU0UCt4UmIvNG1u?= =?utf-8?B?RXViaElTOWc1SmtYdjA4aFlwYjdsK1pzSEQxYWYwNFhZU3VYZFJHRWFGaXFF?= =?utf-8?B?cThDcjU5S01JT3FZWk1pSmhTOFhGc3V4WUhzYWJoS1p1dlZPT0dYMVdyR1hS?= =?utf-8?B?QkhRcW1oVDZ0NkhoZmhYMnJoVElaak5wNEoxejlkY2Uzemg3bFhCbm12TFhu?= =?utf-8?B?UHB2MmxyblZwWlIzNzdMdzBlbDFIMEdJY1lmY0M1Nnl2WENvNmNsODlCU01D?= =?utf-8?B?dkE2RDdja1diRWY5OTZ4NERuL1IrT204MG1SVUllYlJ2OTA2b2pjRmQwVDlh?= =?utf-8?B?c0JQY0NFODBUNGZSU3IzZ2o0MDBZRytxZjVOTStybXFoeEFSZHVRK3F4MXZG?= =?utf-8?B?RXgwV2JlVkJIS0VqaEpPMmdBanVGOVZGdnBNUWhuL0hWaFk3cHVqRkxQc3Bz?= =?utf-8?B?U1MwcnRHME9zbWZMcEs5YWt6RHpEOWxLVGMrd2MrSkJFOWg2STJjeFBqcFMv?= =?utf-8?B?eTR1NWlFUEZDTmErNC9zUVZCd0g5eUxuaG1DcDBKRkZlY0h2QVg4VzJlYjU2?= =?utf-8?B?aXpnTzZxbzNOblRTa3lxdDdjVFhvdDNiSUJhQmQrMHFVTXE5bFU2My9kYWgy?= =?utf-8?B?YnVIb0ZXMEZrNFJ6Y09YQm9oa01rMktIU1pLWGpGb3ZWbysrMStBNVJNcnI4?= =?utf-8?B?S3UyZGlBNWpZRW8zTFFibDJzWkVBMm5qTzd4ZFJSWWdRRm1DNFVhcmZ4c0F4?= =?utf-8?B?a21MMmR2ejRaaHZHL1d1bHU3T0g1Z09Cb0tmdzJZbkYrMzg1MFJkWTJIUXc5?= =?utf-8?B?V1BSRVNCTlJ0ZGUrVG1Dd0E0b2hSK3ViOUhaT05udmJKL1ZBWHpvTXUzQ3pm?= =?utf-8?B?L25BYW1yaEJtWnRVMGpsSnZBRFpmOHJjbUNRT1lzQ2ZZNEtCanZqZGtMZ2FX?= =?utf-8?B?RVpnQUhydWJxWnpFV3hoSEN2TUdTM0JEOWxUTXJZQ2h2a3FDOUhLakxNK1gw?= =?utf-8?B?NlpEaFVkVTBSNkErZjhtRmtsNTJNZ0hTSFBab2pLdmoreWpLaWlub2lUY1pG?= =?utf-8?B?MWc1RFRPQkJDb20yeHRaWFduUE15aU42RGFhazFIb3dMWk5LcDJEWnFlN21Y?= =?utf-8?B?MEE1UlpIK2dsb3RkN2orNFhFWndrT05jd2lCUGJLQjZPSlNtdEtWUVhzekhO?= =?utf-8?B?NnBueTVySmY1YWViMmZDaWhvejBFUFFZSXJpMnU4eDhwK0FLT2N1WFREdkZZ?= =?utf-8?B?WmFzZW1nbzMzZFpQNDF0VHVKNmUzS0JhYnRJbUc4bXpxZDdvK2N2MlVMeWpK?= =?utf-8?Q?3ERIGQZ6kRDW1WNyF1iDQSUgb1jZt3RQkpj91DXigJ6/l?= X-MS-Exchange-AntiSpam-MessageData-1: hRritfizi5r+pA== X-OriginatorOrg: Nvidia.com X-MS-Exchange-CrossTenant-Network-Message-Id: 61f89f2e-1ee7-4b96-e08a-08debd54ab51 X-MS-Exchange-CrossTenant-AuthSource: CH2PR12MB3990.namprd12.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 29 May 2026 07:34:08.4014 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 43083d15-7273-40c1-b7db-39efd9ccc17a X-MS-Exchange-CrossTenant-MailboxType: HOSTED X-MS-Exchange-CrossTenant-UserPrincipalName: JGEaX7FrCTshZWU0xgxoMCRR6WcZmE2lDpxG3x9hjlhr5kYNsw+ej+aQu1DUOJ9uD0xqXdA0/708xcKio27Xsg== X-MS-Exchange-Transport-CrossTenantHeadersStamped: MN2PR12MB4237 Currently, the GSP is left running after the driver is unbound. This is not great for several reasons, notably that it can still access shared memory areas that the kernel will now reclaim (especially problematic on setups without an IOMMU). Fix this by sending the `UNLOADING_GUEST_DRIVER` GSP command when the `Gpu` is dropped. This stops the GSP and lets us proceed with the rest of the unbind sequence in a later patch. Reviewed-by: Eliot Courtney Co-developed-by: Eliot Courtney Signed-off-by: Eliot Courtney Signed-off-by: Alexandre Courbot Reviewed-by: Danilo Krummrich --- drivers/gpu/nova-core/gpu.rs | 21 ++++++++++- drivers/gpu/nova-core/gsp/boot.rs | 45 +++++++++++++++++++= ++++ drivers/gpu/nova-core/gsp/commands.rs | 43 +++++++++++++++++++= +++ drivers/gpu/nova-core/gsp/fw.rs | 4 ++ drivers/gpu/nova-core/gsp/fw/commands.rs | 45 +++++++++++++++++++= ++++ drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs | 11 ++++++ 6 files changed, 168 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index cf134cab49cd..011d504830e4 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -243,8 +243,10 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Resu= lt { } =20 /// Structure holding the resources required to operate the GPU. -#[pin_data] +#[pin_data(PinnedDrop)] pub(crate) struct Gpu<'gpu> { + /// Device owning the GPU. + device: &'gpu device::Device, spec: Spec, /// MMIO mapping of PCI BAR 0. bar: &'gpu Bar0, @@ -266,6 +268,7 @@ pub(crate) fn new( bar: &'gpu Bar0, ) -> impl PinInit + 'gpu { try_pin_init!(Self { + device: pdev.as_ref(), spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| { dev_info!(pdev,"NVIDIA ({})\n", spec); })?, @@ -294,3 +297,19 @@ pub(crate) fn new( }) } } + +#[pinned_drop] +impl PinnedDrop for Gpu<'_> { + fn drop(self: Pin<&mut Self>) { + let this =3D self.project(); + let device =3D *this.device; + let bar =3D *this.bar; + + let _ =3D this + .gsp + .as_ref() + .get_ref() + .unload(device, bar, &*this.gsp_falcon) + .inspect_err(|e| dev_err!(device, "failed to unload GSP: {:?}\= n", e)); + } +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/= boot.rs index 1bd9f21fc443..adc66809ce83 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AF= FILIATES. All rights reserved. =20 use kernel::{ + bits, device, dma::Coherent, io::poll::read_poll_timeout, @@ -23,6 +25,7 @@ }, gpu::Chipset, gsp::{ + cmdq::Cmdq, commands, GspFwWprMeta, // }, @@ -97,4 +100,46 @@ pub(crate) fn boot( =20 Ok(()) } + + /// Shut down the GSP and wait until it is offline. + fn shutdown_gsp( + cmdq: &Cmdq, + bar: &Bar0, + gsp_falcon: &Falcon, + mode: commands::PowerStateLevel, + ) -> Result<()> { + // Command to shut the GSP down. + cmdq.send_command(bar, commands::UnloadingGuestDriver::new(mode))?; + + // Wait until GSP signals it is suspended. + const LIBOS_INTERRUPT_PROCESSOR_SUSPENDED: u32 =3D bits::bit_u32(3= 1); + read_poll_timeout( + || Ok(gsp_falcon.read_mailbox0(bar)), + |&mb0| mb0 & LIBOS_INTERRUPT_PROCESSOR_SUSPENDED !=3D 0, + Delta::from_millis(10), + Delta::from_secs(5), + ) + .map(|_| ()) + } + + /// Attempts to unload the GSP firmware. + /// + /// This stops all activity on the GSP. + pub(crate) fn unload( + &self, + dev: &device::Device, + bar: &Bar0, + gsp_falcon: &Falcon, + ) -> Result { + // Shut down the GSP. + Self::shutdown_gsp( + &self.cmdq, + bar, + gsp_falcon, + commands::PowerStateLevel::Level0, + ) + .inspect_err(|e| dev_err!(dev, "Unload guest driver failed: {:?}\n= ", e))?; + + Ok(()) + } } diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/= gsp/commands.rs index ac9cef312b10..3a365455d10c 100644 --- a/drivers/gpu/nova-core/gsp/commands.rs +++ b/drivers/gpu/nova-core/gsp/commands.rs @@ -232,3 +232,46 @@ pub(crate) fn gpu_name(&self) -> core::result::Result<= &str, GpuNameError> { .map_err(GpuNameError::InvalidUtf8) } } + +pub(crate) use fw::commands::PowerStateLevel; + +/// The `UnloadingGuestDriver` command, used to shut down the GSP. +/// +/// Only used within the `gsp` module. +pub(super) struct UnloadingGuestDriver { + level: PowerStateLevel, +} + +impl UnloadingGuestDriver { + /// Creates a new `UnloadingGuestDriver` command for the given [`Power= StateLevel`]. + pub(super) fn new(level: PowerStateLevel) -> Self { + Self { level } + } +} + +impl CommandToGsp for UnloadingGuestDriver { + const FUNCTION: MsgFunction =3D MsgFunction::UnloadingGuestDriver; + type Command =3D fw::commands::UnloadingGuestDriver; + type Reply =3D UnloadingGuestDriverReply; + type InitError =3D Infallible; + + fn init(&self) -> impl Init { + fw::commands::UnloadingGuestDriver::new(self.level) + } +} + +/// The reply from the GSP to the [`UnloadingGuestDriver`] command. +pub(super) struct UnloadingGuestDriverReply; + +impl MessageFromGsp for UnloadingGuestDriverReply { + const FUNCTION: MsgFunction =3D MsgFunction::UnloadingGuestDriver; + type InitError =3D Infallible; + type Message =3D (); + + fn read( + _msg: &Self::Message, + _sbuffer: &mut SBufferIter>, + ) -> Result { + Ok(UnloadingGuestDriverReply) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw= .rs index 3245793bbe42..33c9f5860771 100644 --- a/drivers/gpu/nova-core/gsp/fw.rs +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -279,6 +279,7 @@ pub(crate) enum MsgFunction { Nop =3D bindings::NV_VGPU_MSG_FUNCTION_NOP, SetGuestSystemInfo =3D bindings::NV_VGPU_MSG_FUNCTION_SET_GUEST_SYSTEM= _INFO, SetRegistry =3D bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY, + UnloadingGuestDriver =3D bindings::NV_VGPU_MSG_FUNCTION_UNLOADING_GUES= T_DRIVER, =20 // Event codes GspInitDone =3D bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE, @@ -323,6 +324,9 @@ fn try_from(value: u32) -> Result { Ok(MsgFunction::SetGuestSystemInfo) } bindings::NV_VGPU_MSG_FUNCTION_SET_REGISTRY =3D> Ok(MsgFunctio= n::SetRegistry), + bindings::NV_VGPU_MSG_FUNCTION_UNLOADING_GUEST_DRIVER =3D> { + Ok(MsgFunction::UnloadingGuestDriver) + } =20 // Event codes bindings::NV_VGPU_MSG_EVENT_GSP_INIT_DONE =3D> Ok(MsgFunction:= :GspInitDone), diff --git a/drivers/gpu/nova-core/gsp/fw/commands.rs b/drivers/gpu/nova-co= re/gsp/fw/commands.rs index db46276430be..42985d446bae 100644 --- a/drivers/gpu/nova-core/gsp/fw/commands.rs +++ b/drivers/gpu/nova-core/gsp/fw/commands.rs @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AF= FILIATES. All rights reserved. =20 use kernel::{ device, @@ -129,3 +130,47 @@ unsafe impl AsBytes for GspStaticConfigInfo {} // SAFETY: This struct only contains integer types for which all bit patte= rns // are valid. unsafe impl FromBytes for GspStaticConfigInfo {} + +/// Power level requested to the [`UnloadingGuestDriver`] command. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u32)] +#[expect(unused)] +pub(crate) enum PowerStateLevel { + /// Full unload. + Level0 =3D bindings::NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_0, + /// S3 (suspend to RAM). + Level3 =3D bindings::NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_3, + /// Hibernate (suspend to disk). + Level7 =3D bindings::NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_7, +} + +impl PowerStateLevel { + /// Returns `true` if this state represents a power management transit= ion, i.e. some GPU state + /// must survive it (as opposed to a full unload). + pub(crate) fn is_power_transition(self) -> bool { + self !=3D PowerStateLevel::Level0 + } +} + +/// Payload of the `UnloadingGuestDriver` command and message. +#[repr(transparent)] +#[derive(Clone, Copy, Debug, Zeroable)] +pub(crate) struct UnloadingGuestDriver(bindings::rpc_unloading_guest_drive= r_v1F_07); + +impl UnloadingGuestDriver { + pub(crate) fn new(level: PowerStateLevel) -> Self { + Self(bindings::rpc_unloading_guest_driver_v1F_07 { + bInPMTransition: u8::from(level.is_power_transition()), + bGc6Entering: 0, + newLevel: level as u32, + ..Zeroable::zeroed() + }) + } +} + +// SAFETY: Padding is explicit and will not contain uninitialized data. +unsafe impl AsBytes for UnloadingGuestDriver {} + +// SAFETY: This struct only contains integer types for which all bit patte= rns +// are valid. +unsafe impl FromBytes for UnloadingGuestDriver {} diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gp= u/nova-core/gsp/fw/r570_144/bindings.rs index 334e8be5fde8..f82ed097b283 100644 --- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -30,6 +30,9 @@ fn fmt(&self, fmt: &mut ::core::fmt::Formatter<'_>) -> ::= core::fmt::Result { fmt.write_str("__IncompleteArrayField") } } +pub const NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_0: u32 =3D 0; +pub const NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_3: u32 =3D 3; +pub const NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_7: u32 =3D 7; pub const NV_VGPU_MSG_SIGNATURE_VALID: u32 =3D 1129337430; pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS2: u32 =3D 0; pub const GSP_FW_HEAP_PARAM_OS_SIZE_LIBOS3_BAREMETAL: u32 =3D 23068672; @@ -880,6 +883,14 @@ fn default() -> Self { } } #[repr(C)] +#[derive(Debug, Default, Copy, Clone, MaybeZeroable)] +pub struct rpc_unloading_guest_driver_v1F_07 { + pub bInPMTransition: u8_, + pub bGc6Entering: u8_, + pub __bindgen_padding_0: [u8; 2usize], + pub newLevel: u32_, +} +#[repr(C)] #[derive(Debug, Default, MaybeZeroable)] pub struct rpc_run_cpu_sequencer_v17_00 { pub bufferSizeDWord: u32_, --=20 2.54.0 From nobody Mon Jun 8 12:12:36 2026 Received: from CO1PR03CU002.outbound.protection.outlook.com (mail-westus2azon11010013.outbound.protection.outlook.com [52.101.46.13]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7FE6A3ACF0F; Fri, 29 May 2026 07:34:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=fail smtp.client-ip=52.101.46.13 ARC-Seal: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040060; cv=fail; b=esFb64C2HQwb17AJWIj3cbqGfpZRe9MP79mq15CxbtiEM2OgApqQn7J64m+GWQFyoY/AlJM6p5g2yJXv+d9Z/6NCE6Kf0Ss01IorS5UPETk8oFSWO1IKrUsYlEg9SZTBiGRra3yqXHBlBPrbiQzl8o+zBF81oeXMMqmMG8bKJA8= ARC-Message-Signature: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040060; c=relaxed/simple; bh=XU7SoRDMkSNzcwwA7yCbNrDJ5rWpNuZbAVBuxvQQxyo=; h=From:Date:Subject:Content-Type:Message-Id:References:In-Reply-To: To:Cc:MIME-Version; b=jF5Pe3eANmqaNQ22lPlQ5QFEAb2bV8LPSCqBDFzv0KWR88W27vtJqnc5N9CGphBeX/H4fU1kVT9JVJ7EmTVtOpr8MbNzs+S06fmcjA+mrsOTeaJpviOh3PzBYCFm2ckIMiGyrmQLtAJ2a+P/qhaaf+NugbZb3/yjacZoBnxFmfc= ARC-Authentication-Results: i=2; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com; spf=fail smtp.mailfrom=nvidia.com; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b=SKjKB4CV; arc=fail smtp.client-ip=52.101.46.13 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=nvidia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b="SKjKB4CV" ARC-Seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=CGmmo4+SZma912AX5p4EgdIA9uMjbGeS2fi6i0VGnCkeP6eLhT89c0lB+hvn2AwWltqOjowu4gF9p6u+zpI51fl67+g1kNromn4VaPym1y5lpqtITmCa/lX1prdgWTCLFQqMy4deOtLyTGwsA4LouDZid/dvX5jFEYtKuGQyY7O76R3dY2qILeujC8Uxgr/vcehTfx4htfEo7r+cxz+hszP/hQ2S1xBdVgYK1447bXTOkdyTDHEXOq8rxeXWHjmkB3h+bVG3zkvd29muIOF5RvWUotJ4rXdHmERoaWOWJ4qIWbKFTA6fHWTPUZBjkWzokNigdGmOxjXH9Az+ZLRjiA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=NriK2fzbsKsB4meMDOdhnip5A5pRc9DXh76TGL5w0Y8=; b=OoIFvhHQwyNEwuuuslTPo4wcRzCOSyQ8/okzUEfaWHWlFj1qVqOA3Ckk3tzqoq7+HQRj2hcjiXhaTBGWmFfCGkDDehTOYtfB9Sue47rIxRMqO0anqoI6DtS6nzz3qGHAQ2uZ+l+38UGfDCZJoq9acDAFUIEkPgmqtpx7laRLMbAM1fB0A508zA+EHIWKbYUYeaYjcV//EeDJaH/o+2uqkTbeP2zyZH2B4wC2XG0fjN/E7/8w75RVG/Xxt8QUvLx4KCcYJfM47JL5zRl33AQ3pKaaILrk14SYqll3rvegll1QMiI2cHlWJzUXvDOFvnQ1seSpPG9hSi/WJowb0y946g== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=nvidia.com; dmarc=pass action=none header.from=nvidia.com; dkim=pass header.d=nvidia.com; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=Nvidia.com; s=selector2; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=NriK2fzbsKsB4meMDOdhnip5A5pRc9DXh76TGL5w0Y8=; b=SKjKB4CVnsEkwqVlryjN+iRzJLwxKxwfUVowdCXSf69qSmnEHCW0ASd5SVdn0qxpuTD9FY7gRXnl6g/ZHmOIxQYDXBGhcmwOaV+rLaAGH27B+YKTkk5a4c9Wdlfv4BaZ/O/9Dkci8q00GSa5TDzTmoFq0Lzky2I0VXVYjH0CGWybHhlieEm8Fcfx2R/wAMeKqtub4FZAHsSNOMfpPhq1zpJc2cN+0zSjMAr34Ej+b9DRduorgqAG7MPJqbqhja0Ykbaq9e9qo0PAnjNPA6nmKywAHr/z8XF/MIVkm+IEkuVEW28py3O9yPmXKL/itaAcfLayeod2cslSZSix4SG8KQ== Authentication-Results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=nvidia.com; Received: from CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) by MN2PR12MB4237.namprd12.prod.outlook.com (2603:10b6:208:1d6::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.21.48.19; Fri, 29 May 2026 07:34:12 +0000 Received: from CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989]) by CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989%4]) with mapi id 15.21.0071.010; Fri, 29 May 2026 07:34:12 +0000 From: Alexandre Courbot Date: Fri, 29 May 2026 16:33:43 +0900 Subject: [PATCH v7 3/4] gpu: nova-core: run Booter Unloader and FWSEC-SB upon unbinding Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260529-nova-unload-v7-3-678f39209e00@nvidia.com> References: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> In-Reply-To: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> To: Danilo Krummrich , Alice Ryhl , David Airlie , Simona Vetter Cc: John Hubbard , Alistair Popple , Timur Tabi , Eliot Courtney , nova-gpu@lists.linux.dev, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, Alexandre Courbot X-Mailer: b4 0.15.2 X-ClientProxiedBy: TYCP286CA0128.JPNP286.PROD.OUTLOOK.COM (2603:1096:400:2b6::18) To CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: CH2PR12MB3990:EE_|MN2PR12MB4237:EE_ X-MS-Office365-Filtering-Correlation-Id: cc13365d-e64f-49c0-c384-08debd54ad9a X-MS-Exchange-SenderADCheck: 1 X-MS-Exchange-AntiSpam-Relay: 0 X-Microsoft-Antispam: BCL:0;ARA:13230040|366016|1800799024|376014|10070799003|18002099003|22082099003|11063799006|56012099006|3023799007|6133799003; X-Microsoft-Antispam-Message-Info: CpxgU4nnkaXAcey8ll9/p9cxfowBbD4LTeFsG9hewvJTIiJa0AmKeu5vVvblHDaGes89AiHaXkhfPsL9Arp3rbn4HZSeY+nsKnXQoPn91ESebpv1I5caR3lRdVx+zRF1HTsMx9SxJdLjjTg32usXnmEcU5d2Ih5tOk68N5WKr6aeukc0aPUrquxo/NCsMYrAyLHEJyTDheamHdJQEY9iTQRKsCgFlMKeu7xmC6gsmUHAWFp0geXOeBBegXcZVnbCmTSu61rpy7CfscspcRPWjlSCbb1aEBJyKd9jaH9q5mLWAHoCzOTxH4qnpf2V3Pi7Q+PZlfqLM7h0T0xBm/TvTDsb8XXYMjpfUQE+k2zY+G1CPrMaDjgFqr8mR8XyCXYq9Oi0Pzrml7OmyjVjkxr8tGr+reJsm8BzyFZeuO/aAIgkBwZSDs6qbJAPvdBc8Hq16HJqjbSDthJDd9AFFB/5crcW+YLFdirgSoVPpQwNO6emwjLRww7vfgJF2TCyP7Y7Z6sNf5brybZ4F0JVRqhFrC78iHtCNnQi9S9TYRK7npS0O4j6tO5yXbs6uLb/ph2+WfEegzTV9K5+Rhh5PKA3MaaUY77hbVzDPpPnKD23INyomRsPNu277clPo6aj67pmYyOr65835adyGSpnYsA/PPGTyg1QNoT0JLmEZ0yNeQ9S7J8T93KboCLYXyex697I X-Forefront-Antispam-Report: CIP:255.255.255.255;CTRY:;LANG:en;SCL:1;SRV:;IPV:NLI;SFV:NSPM;H:CH2PR12MB3990.namprd12.prod.outlook.com;PTR:;CAT:NONE;SFS:(13230040)(366016)(1800799024)(376014)(10070799003)(18002099003)(22082099003)(11063799006)(56012099006)(3023799007)(6133799003);DIR:OUT;SFP:1101; X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 2 X-MS-Exchange-AntiSpam-MessageData-0: =?utf-8?B?djdQT2VnN3ZNOC96ckttMG5sRVVhZWlldXk4S3pDT1lKSE1KL3dqRUc0bkh6?= =?utf-8?B?UEZreGN3MWhCbVowTjlKSHQ5WXpzYTY1QTdWSFRkUHd5dURpdFBYV0dvT0xo?= =?utf-8?B?RnJNeUVRMjc5ay8ycFJoNmNhVlZ3S3pHdUJOR1hESVJsWUl1L2ZpNnUzczRh?= =?utf-8?B?MndOekx4cEJRdlZMRFVqdjZhQTVaL2ZzREg1ZHZPMWV6NVcyUHdnZGxTQ1F2?= =?utf-8?B?ZVU3cC8rdDVmYVlJeGJ0UmNqb1dUWUt3em43U2hRNEo1dWVSZzR4eXBnb0NF?= =?utf-8?B?amxGa1QxUU9jdkRUd0hwei9kS3ZDYWwrUTFMSy9jcU1sUnhMUGVZMzZ1MVVl?= =?utf-8?B?Q2R3Ni8yWDQzNVp5Ujh4WU9RNjlybmJCQ2RNbUN4Q2RUUW11LzZBTHpyMmJx?= =?utf-8?B?MUl5Rit6MDJvU1JaZmpaUlNpVWJCbFFHL2xsR3hwUFIyNkwwM1phWkNXSUYx?= =?utf-8?B?NldRdWRvRDd1aGE2Q053bmFIVE5US0NBeEJOOUdYSGwxOFduTmpSY0RaSVJL?= =?utf-8?B?cnU3aW1xaGV5T2pJZVRNZjB3ckRkcmcwanJJci84RFdaNGFRNnFFV2N4bTR4?= =?utf-8?B?aEQ2dWJIN1BNWVp1TmRqVWh5Q0NCdG4zeDRwL0duMThrVXpRdmtSa1MvcEhE?= =?utf-8?B?Mlk2Z3RzbDZ1aFllKzFnRjhSd09sUEtjV2UzenQzM3pyMkdyR3pZd2JwdGl3?= =?utf-8?B?LytlYmVjRis3ZENveFk2RkVhOG01M3pZZEl4UVJTdVkwV1FkNzhuZWQ5REVS?= =?utf-8?B?ZTk2SFhpaUM1ZEtvdkdOb0t5TzZzekZJMGF2eG0rSkRudVdIbTZhU0thSnRC?= =?utf-8?B?MlhENDBLc1NNL292bXhOSnA5ZEk5WWYrdDd4eENHcVNaR2FsalpKdHNrQzg4?= =?utf-8?B?cjZFVytrT1RJMWxUT2NKc1QzSy9DNUVhRE9tWEZobVl1eDVXdzYvdkU3cko5?= =?utf-8?B?ZGUwcU1YMWFXV3BrazBLbGt2VHM0NDZqa0puWFlqSzFQbEQ4SWdqSHpHWXEw?= =?utf-8?B?RnBSbVJWT082djJGV09NR1hqUGlQZ01DWUZFYkpoR0hMdGlsSThGOUR2bnh1?= =?utf-8?B?aFQ4eUFPcU13TTVIMGY2ZkRXZ3ZzL3FWcmJ3OGRKWHpuM2lrS0hXYlBrYkhU?= =?utf-8?B?TXFNNlozdHdUU0REWGZ2aVNsU0NhZktPQXR0aFp5OHR5YkZOYU1wTlRQVGdD?= =?utf-8?B?aHUwY1V5U0tLY1BZZzJ6RjNVL3JSOFI2N0dwblhsVjVUMnhFWFdyYkFpeGor?= =?utf-8?B?RWJTbGxRRzdFVmJtekFER3hDd0hMU2RKOXZrV3owT0d5RmlSQTk0Qk9wYUZK?= =?utf-8?B?ZFlYMHBQQUpOeElwSm1CTXZySUhDcEM1dDRRVWp2ZVNjN0dsS0RvcWs3Wm43?= =?utf-8?B?aTJKRVkrbnp1aklsbnVzTEYwV1I2aFRIbmYvaEhINE9MM096UGhzMlFLSkRr?= =?utf-8?B?R0ZEeDlLOUFyVUFmV3Z6L0xVN3ZwcURjckcwRDVoaTYxNG5HZ0FIMW96djJD?= =?utf-8?B?azUxWi9UdjkxTHNwSjk0ekd4UEIwVVpMYzVTZ1BXLy9LMWkvdkUyQlFacSti?= =?utf-8?B?VGdJQWlSR21BdXIwc2w5WlZhWEVVVXc2M2VjTkZvSXl3M1pnaHZlWU5ycWZR?= =?utf-8?B?OVR1LzlldG40NFJDa0JTYTFieGVreTVzK1EwT3NIOXZEamRuTkR6bUNadkxL?= =?utf-8?B?TVFiY2hkSS9nNjFUR1VkdkdISTBFOHJldHArZVk3N1NwT1ZVbzZOemhDWEpi?= =?utf-8?B?MlJwaDlRZGFrUnJYSTNuZ2RKQmZtOE1DWVorQkFyLzhFUnhjekJkbG5FWVV5?= =?utf-8?B?bjFVbVRXVlpZVXVxSTh4TnVWRS9vQ0lpSCtBemljdEl3WjhGMXErTlc3N0ww?= =?utf-8?B?U0tMbnQxMGJtM3oxRnFLYkRNWGZ5VHNIU2pKbHdFeVRnbGQxL1ovNDd1U2tF?= =?utf-8?B?dDg3NEQvd3RLNFE5QnlKb2w5VEorMUs1WjljK0YrRExjeG9rRkhiVWFiMk9a?= =?utf-8?B?dHhDU2gybGRDQXpyU0I5TEF6ZTFuQ0RnazFMOE9va1VGT1JINEliVU52VmlI?= =?utf-8?B?UUtjMkt1SndMbktYQkdEeVJjampoQzVrZ3hhVXRKMlE2R1Z5ZjhqbzJBa3B5?= =?utf-8?B?NXBtMEpXZmc4RUFjeXRuYWxWZkNWZHVlQS9XTUtvVkRybGRIbW41bU1RY3Mw?= =?utf-8?B?OVhEaC9CK0RhWGdXWloyZHpwTTBoZUVseTZuQm51eFJQRmlaR0EycUxHa1pB?= =?utf-8?B?enUvT0o5OFIvalJ5MEpjSytZRjdJQlpMMlBNSUUxcEUwZVNicm5SOU5Idllz?= =?utf-8?B?QjJpZ0JEcm5OL25jMWlsZlBDbCtBWTVMZ0VyQlNobzJ2UkdGYmZiQzVWaFBY?= =?utf-8?Q?U7MtYriFtSe4LP6JFStx1LdlA2JCU8GbbNnzltquYKoHY?= X-MS-Exchange-AntiSpam-MessageData-1: ktlEZZaCVWVZdg== X-OriginatorOrg: Nvidia.com X-MS-Exchange-CrossTenant-Network-Message-Id: cc13365d-e64f-49c0-c384-08debd54ad9a X-MS-Exchange-CrossTenant-AuthSource: CH2PR12MB3990.namprd12.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 29 May 2026 07:34:12.2496 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 43083d15-7273-40c1-b7db-39efd9ccc17a X-MS-Exchange-CrossTenant-MailboxType: HOSTED X-MS-Exchange-CrossTenant-UserPrincipalName: Tuolhc44f7LfS2S8ApftnhAv31JFJYlCvXZuEjGMzsnBfI60+tFk5XiVeOhJ4mVyW6ovgccczHuJryixclfxkg== X-MS-Exchange-Transport-CrossTenantHeadersStamped: MN2PR12MB4237 When probing the driver, the FWSEC-FRTS firmware creates a WPR2 secure memory region to store the GSP firmware, and the Booter Loader loads and starts that firmware into the GSP, making it run in RISC-V mode. These operations need to be reverted upon unloading, particularly the WPR2 secure region creation, as its presence prevents the driver from subsequently probing. Thus, prepare the Booter Unloader and FWSEC-SB firmware images when booting the GSP, so they can be executed at unbind time to put the GPU into a state where it can be probed again. Reviewed-by: Eliot Courtney Co-developed-by: Eliot Courtney Signed-off-by: Eliot Courtney Signed-off-by: Alexandre Courbot Reviewed-by: Danilo Krummrich --- drivers/gpu/nova-core/firmware/booter.rs | 1 - drivers/gpu/nova-core/firmware/fwsec.rs | 1 - drivers/gpu/nova-core/gpu.rs | 15 +++- drivers/gpu/nova-core/gsp.rs | 3 + drivers/gpu/nova-core/gsp/boot.rs | 38 +++++++-- drivers/gpu/nova-core/gsp/hal.rs | 21 ++++- drivers/gpu/nova-core/gsp/hal/gh100.rs | 2 +- drivers/gpu/nova-core/gsp/hal/tu102.rs | 142 +++++++++++++++++++++++++++= +++- drivers/gpu/nova-core/regs.rs | 5 ++ 9 files changed, 209 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-co= re/firmware/booter.rs index e45e5dc8d5d2..c5e17605e1a3 100644 --- a/drivers/gpu/nova-core/firmware/booter.rs +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -282,7 +282,6 @@ fn new_booter(data: &[u8]) -> Result { #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) enum BooterKind { Loader, - #[expect(unused)] Unloader, } =20 diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-cor= e/firmware/fwsec.rs index 8810cb49db67..4108f28cd338 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -144,7 +144,6 @@ pub(crate) enum FwsecCommand { /// image into it. Frts { frts_addr: u64, frts_size: u64 }, /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU. - #[expect(dead_code)] Sb, } =20 diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 011d504830e4..aed992488db3 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -18,7 +18,10 @@ Falcon, // }, fb::SysmemFlush, - gsp::Gsp, + gsp::{ + self, + Gsp, // + }, regs, }; =20 @@ -260,6 +263,8 @@ pub(crate) struct Gpu<'gpu> { /// GSP runtime data. Temporarily an empty placeholder. #[pin] gsp: Gsp, + /// GSP unload firmware bundle, if any. + unload_bundle: Option, } =20 impl<'gpu> Gpu<'gpu> { @@ -293,7 +298,10 @@ pub(crate) fn new( =20 gsp <- Gsp::new(pdev), =20 - _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon= )? }, + // This member must be initialized last, so the `UnloadBundle`= can never be dropped from + // outside of the constructed `Gpu`, ensuring that the unload = sequence is properly run + // in case of failure. + unload_bundle: gsp.boot(pdev, bar, spec.chipset, gsp_falcon, s= ec2_falcon)?, }) } } @@ -304,12 +312,13 @@ fn drop(self: Pin<&mut Self>) { let this =3D self.project(); let device =3D *this.device; let bar =3D *this.bar; + let bundle =3D this.unload_bundle.take(); =20 let _ =3D this .gsp .as_ref() .get_ref() - .unload(device, bar, &*this.gsp_falcon) + .unload(device, bar, &*this.gsp_falcon, &*this.sec2_falcon, bu= ndle) .inspect_err(|e| dev_err!(device, "failed to unload GSP: {:?}\= n", e)); } } diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 38378f104068..1885cfa5cb38 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -185,3 +185,6 @@ pub(crate) fn new(pdev: &pci::Device) ->= impl PinInit); diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/= boot.rs index adc66809ce83..8d6fcc35b653 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -38,7 +38,8 @@ impl super::Gsp { /// user-space, patching them with signatures, and building firmware-s= pecific intricate data /// structures that the GSP will use at runtime. /// - /// Upon return, the GSP is up and running, and its runtime object giv= en as return value. + /// Upon return, the GSP is up and running, and its unload bundle (to = be given as argument to + /// [`Self::unload`]) returned. pub(crate) fn boot( self: Pin<&mut Self>, pdev: &pci::Device, @@ -46,7 +47,7 @@ pub(crate) fn boot( chipset: Chipset, gsp_falcon: &Falcon, sec2_falcon: &Falcon, - ) -> Result { + ) -> Result> { let dev =3D pdev.as_ref(); let hal =3D super::hal::gsp_hal(chipset); =20 @@ -57,8 +58,8 @@ pub(crate) fn boot( =20 let wpr_meta =3D Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new= (&gsp_fw, &fb_layout))?; =20 - // Perform the chipset-specific boot sequence. - hal.boot( + // Perform the chipset-specific boot sequence, and retrieve the un= load bundle. + let unload_bundle =3D hal.boot( &self, dev, bar, @@ -98,7 +99,7 @@ pub(crate) fn boot( Err(e) =3D> dev_warn!(pdev, "GPU name unavailable: {:?}\n", e), } =20 - Ok(()) + Ok(unload_bundle) } =20 /// Shut down the GSP and wait until it is offline. @@ -130,16 +131,35 @@ pub(crate) fn unload( dev: &device::Device, bar: &Bar0, gsp_falcon: &Falcon, + sec2_falcon: &Falcon, + unload_bundle: Option, ) -> Result { - // Shut down the GSP. - Self::shutdown_gsp( + // Shut down the GSP. Keep going even in case of error. + let mut res =3D Self::shutdown_gsp( &self.cmdq, bar, gsp_falcon, commands::PowerStateLevel::Level0, ) - .inspect_err(|e| dev_err!(dev, "Unload guest driver failed: {:?}\n= ", e))?; + .inspect_err(|e| dev_err!(dev, "GSP shutdown failed: {:?}\n", e)); =20 - Ok(()) + // Run the unload bundle to reset the GSP so it can be booted agai= n. + if let Some(unload_bundle) =3D unload_bundle { + res =3D res.and( + unload_bundle + .0 + .run(dev, bar, gsp_falcon, sec2_falcon) + .inspect_err(|e| dev_err!(dev, "Unload bundle failed: = {:?}\n", e)), + ); + } else { + dev_warn!( + dev, + "Unload bundle is missing, GSP won't be properly reset.\n" + ); + + res =3D Err(EAGAIN); + } + + res.inspect(|()| dev_info!(dev, "GSP successfully unloaded\n")) } } diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/h= al.rs index fb3edaeb3160..501b852dcb29 100644 --- a/drivers/gpu/nova-core/gsp/hal.rs +++ b/drivers/gpu/nova-core/gsp/hal.rs @@ -30,9 +30,28 @@ }, }; =20 +/// Trait for types containing the resources and code required to fully re= set the GSP. +/// +/// The GSP unload code might run in a situation where we cannot load firm= ware dynamically (e.g. +/// because we are in shutdown and the file system is not accessible anymo= re). Thus, the firmware +/// required for unloading is prepared at load time, and stored here until= it needs to be run. +pub(super) trait UnloadBundle: Send { + /// Performs the steps required to properly reset the GSP after it has= been stopped. + fn run( + &self, + dev: &device::Device, + bar: &Bar0, + gsp_falcon: &Falcon, + sec2_falcon: &Falcon, + ) -> Result; +} + /// Trait implemented by GSP HALs. pub(super) trait GspHal: Send { /// Performs the GSP boot process, loading and running the required fi= rmwares as needed. + /// + /// Upon success, returns the [`UnloadBundle`] to be run (if any) in o= rder to properly reset the + /// GSP after it has been stopped. #[allow(clippy::too_many_arguments)] fn boot( &self, @@ -44,7 +63,7 @@ fn boot( wpr_meta: &Coherent, gsp_falcon: &Falcon, sec2_falcon: &Falcon, - ) -> Result; + ) -> Result>; =20 /// Performs HAL-specific post-GSP boot tasks. /// diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core= /gsp/hal/gh100.rs index 3f3675f9c16a..0a8b7f763883 100644 --- a/drivers/gpu/nova-core/gsp/hal/gh100.rs +++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs @@ -41,7 +41,7 @@ fn boot( _wpr_meta: &Coherent, _gsp_falcon: &Falcon, _sec2_falcon: &Falcon, - ) -> Result { + ) -> Result> { Err(ENOTSUPP) } } diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core= /gsp/hal/tu102.rs index a6f2b2e279e8..c4ab081f25c4 100644 --- a/drivers/gpu/nova-core/gsp/hal/tu102.rs +++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs @@ -32,7 +32,10 @@ }, gpu::Chipset, gsp::{ - hal::GspHal, + hal::{ + GspHal, + UnloadBundle, // + }, sequencer::{ GspSequencer, GspSequencerParams, // @@ -44,6 +47,124 @@ vbios::Vbios, // }; =20 +// A ready-to-run FWSEC unload firmware. +// +// Since there are two variants of the prepared firmware (with and without= a bootloader), this type +// abstracts the difference. +enum FwsecUnloadFirmware { + WithoutBl(FwsecFirmware), + WithBl(FwsecFirmwareWithBl), +} + +impl FwsecUnloadFirmware { + /// Loads the FWSEC SB firmware, as well as its bootloader if `chipset= ` requires it. + fn new( + dev: &device::Device, + bar: &Bar0, + chipset: Chipset, + bios: &Vbios, + gsp_falcon: &Falcon, + ) -> Result { + let fwsec_sb =3D FwsecFirmware::new(dev, gsp_falcon, bar, bios, Fw= secCommand::Sb)?; + + Ok(if chipset.needs_fwsec_bootloader() { + Self::WithBl(FwsecFirmwareWithBl::new(fwsec_sb, dev, chipset)?) + } else { + Self::WithoutBl(fwsec_sb) + }) + } + + /// Runs the FWSEC SB firmware. + fn run( + &self, + dev: &device::Device, + bar: &Bar0, + gsp_falcon: &Falcon, + ) -> Result<()> { + match self { + Self::WithoutBl(fw) =3D> fw.run(dev, gsp_falcon, bar), + Self::WithBl(fw) =3D> fw.run(dev, gsp_falcon, bar), + } + } +} + +// Contains the firmware required to fully reset GSP on chipsets where the= GSP is started using +// FWSEC/Booter. +struct Sec2UnloadBundle { + fwsec_sb: FwsecUnloadFirmware, + booter_unloader: BooterFirmware, +} + +impl Sec2UnloadBundle { + /// Load and prepare the resources required to properly reset the GSP = after it has been stopped. + fn build( + dev: &device::Device, + bar: &Bar0, + chipset: Chipset, + bios: &Vbios, + gsp_falcon: &Falcon, + sec2_falcon: &Falcon, + ) -> Result> { + KBox::new( + Self { + fwsec_sb: FwsecUnloadFirmware::new(dev, bar, chipset, bios= , gsp_falcon)?, + booter_unloader: BooterFirmware::new( + dev, + BooterKind::Unloader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?, + }, + GFP_KERNEL, + ) + .map(|b| b as KBox) + .map_err(Into::into) + } +} + +impl UnloadBundle for Sec2UnloadBundle { + fn run( + &self, + dev: &device::Device, + bar: &Bar0, + gsp_falcon: &Falcon, + sec2_falcon: &Falcon, + ) -> Result<()> { + // Run FWSEC-SB to reset the GSP falcon to its pre-libos state. + self.fwsec_sb.run(dev, bar, gsp_falcon)?; + + // Remove WPR2 region if set. + let wpr2_hi =3D bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI); + if wpr2_hi.is_wpr2_set() { + sec2_falcon.reset(bar)?; + sec2_falcon.load(dev, bar, &self.booter_unloader)?; + + // Sentinel value to confirm that Booter Unloader has run. + const MAILBOX_SENTINEL: u32 =3D 0xff; + let (mbox0, _) =3D + sec2_falcon.boot(bar, Some(MAILBOX_SENTINEL), Some(MAILBOX= _SENTINEL))?; + if mbox0 !=3D 0 { + dev_err!(dev, "Booter Unloader returned error 0x{:x}\n", m= box0); + return Err(EINVAL); + } + + // Confirm that the WPR2 region has been removed. + let wpr2_hi =3D bar.read(regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI); + if wpr2_hi.is_wpr2_set() { + dev_err!( + dev, + "WPR2 region still set after Booter Unloader returned\= n" + ); + return Err(EBUSY); + } + } + + Ok(()) + } +} + /// Helper function to load and run the FWSEC-FRTS firmware and confirm th= at it has properly /// created the WPR2 region. fn run_fwsec_frts( @@ -143,9 +264,24 @@ fn boot( wpr_meta: &Coherent, gsp_falcon: &Falcon, sec2_falcon: &Falcon, - ) -> Result { + ) -> Result> { let bios =3D Vbios::new(dev, bar)?; =20 + // Try and prepare the unload bundle. If this fails, the GPU will = need to be reset + // before the driver can be probed again. + let unload_bundle =3D + Sec2UnloadBundle::build(dev, bar, chipset, &bios, gsp_falcon, = sec2_falcon) + .inspect_err(|e| { + dev_warn!(dev, "Failed to prepare unload firmware: {:?= }\n", e); + dev_warn!(dev, "The GSP won't be able to unload proper= ly on unbind.\n"); + dev_warn!( + dev, + "The GPU will need to be reset before the driver c= an bind again.\n" + ); + }) + .map(crate::gsp::UnloadBundle) + .ok(); + // FWSEC-FRTS is not executed on chips where the FRTS region size = is 0 (e.g. GA100). if !fb_layout.frts.is_empty() { run_fwsec_frts(dev, chipset, gsp_falcon, bar, &bios, fb_layout= )?; @@ -175,7 +311,7 @@ fn boot( )? .run(dev, bar, sec2_falcon, wpr_meta)?; =20 - Ok(()) + Ok(unload_bundle) } =20 fn post_boot( diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 6faeed73901d..356fbf364ea5 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -175,6 +175,11 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { pub(crate) fn higher_bound(self) -> u64 { u64::from(self.hi_val()) << 12 } + + /// Returns whether the WPR2 region is currently set. + pub(crate) fn is_wpr2_set(self) -> bool { + self.hi_val() !=3D 0 + } } =20 // PGSP --=20 2.54.0 From nobody Mon Jun 8 12:12:36 2026 Received: from CO1PR03CU002.outbound.protection.outlook.com (mail-westus2azon11010013.outbound.protection.outlook.com [52.101.46.13]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1D1EC3AFCF3; Fri, 29 May 2026 07:34:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=fail smtp.client-ip=52.101.46.13 ARC-Seal: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040065; cv=fail; b=bKC1yp+/M/iZ3R5Lm9NHzq53myIt3dhi6G+Sf9Xsp6njugxgFsXO4pvQCyPyzmQRePd5href67tIGP9cuOlby88o0NagijRYz2Q01R4ZO2l2VaZDUbv8IQCKi8epIOyCwyXIOAMQdStv98VOSz0BXiWldJCdzTRDTLnZtpjD+ao= ARC-Message-Signature: i=2; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1780040065; c=relaxed/simple; bh=+Gfs5S5TX+G/BYtAIPWzyhNZDT3RTk7/ihUisizpGuo=; h=From:Date:Subject:Content-Type:Message-Id:References:In-Reply-To: To:Cc:MIME-Version; b=lQ2e3YGh1fK5JFBCeIRF+ui8oSrOdadJEGpQdFNRdCwCcocFlbaiQAhejDnYkeH61ziADN/cGDDw7veTKfNrNuvtqRPZBq+A0ztbNop/F3Z3Kvj+G1RieyS5Zs0yejcaKJb7/BRwODTMRw8GEpNP+DWceF3dCkleWXgUwWCCsEE= ARC-Authentication-Results: i=2; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com; spf=fail smtp.mailfrom=nvidia.com; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b=Dq8AEDgv; arc=fail smtp.client-ip=52.101.46.13 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=nvidia.com Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=nvidia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=Nvidia.com header.i=@Nvidia.com header.b="Dq8AEDgv" ARC-Seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=Lwo0p1IEu8z2HMMxrPWQpKEkCRxMademV0NQBYY0x0bmqyT/hAjhlKMRrFfC2k+V2YW3cHFuZu1EzhmFpcB9W4BmdYVSiqwaFVMIDn2SU5ybD8gP4RKEMtBoJoN2IugHJl2n+7Gd/7OSI9SG5s5Q2nLTNfaI2XCGw27XHDy5MDWpEdrL+vlSm6mMf8cC/ImAEgapyD4UUBJYzJtA4h0Mts0kaRYemeBFQdEnD4r+zbClBISbFwWRJ/5BvHYrbfApyuukUmIGoKXhNcTqgFloD6d3rVyGoFQB+GjADcUpmg6BsIa/qrMUas88Mcogledgoz5xFROx6z/AtqES+kBPvQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=A3gHGI4pAwsSRBxToqmdEb0B7KkEXnp5IhM3GRi4ivE=; b=Os6eXNt1Jkk+a55IdHmHPnBYsrw3GHn8bBtvJl4QaXXIcNzlb7UrkcoeFJK4SFtOrWfQBP9EvWyCWo4qM30wntO4Vfxkd7mJZg78xoynEci3+AdPnPZIpy8Y8EkCP85jJ3w7kEiP5HeYXjmd5sfxnh2yAq+tYdqwSPaCAMyLDWhaMUSS8t5GtfasjvW3OwD48G6m/yr2sQNbi4mIZS3MZcBAE86p7s7FDBlhHjUL260z3mZvs/GT883/YVmoH7Yz8wiX6xB41HTF5nX7N9PnqU9IOjllEXozfCQiqv3p+Qljdgm6Ea5FD8B07BTLTqu9Hky10yG27O/x5k4GybCpvg== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=nvidia.com; dmarc=pass action=none header.from=nvidia.com; dkim=pass header.d=nvidia.com; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=Nvidia.com; s=selector2; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=A3gHGI4pAwsSRBxToqmdEb0B7KkEXnp5IhM3GRi4ivE=; b=Dq8AEDgvFtZRG49FALDIxM0HyB6CgvyHpv3+o06McEOBN53q1JRZ8uU9+0hpAtCrJ6AC5sARHhFUh5YQQsDr7xYh6RnRKHQ0cKBs9Q3CKVdgmvpN+lm6k4Ev9bQgoajuNTWjJi3xsxXTbu+RtjCVgq18T8e70KDL1scvG1+MQkGP0k6ah+bX8JdfqvPtxVKKxwSHc4agPM27LSSH7wB77RC4HsVKo3/5hH5RN/Sv05dqREBe7S7ijayNUKeNdWM0Mjpj/1LvUOLdZGtyF4uJrmQh1RSu7n3ls3SRKQT1f+UTFE0prfoj5nKXz9EsvormEqTpma5UUAyoija/k8NOyg== Authentication-Results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=nvidia.com; Received: from CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) by MN2PR12MB4237.namprd12.prod.outlook.com (2603:10b6:208:1d6::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.21.48.19; Fri, 29 May 2026 07:34:16 +0000 Received: from CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989]) by CH2PR12MB3990.namprd12.prod.outlook.com ([fe80::7de1:4fe5:8ead:5989%4]) with mapi id 15.21.0071.010; Fri, 29 May 2026 07:34:16 +0000 From: Alexandre Courbot Date: Fri, 29 May 2026 16:33:44 +0900 Subject: [PATCH v7 4/4] gpu: nova-core: gsp: run the unload bundle if Gsp::boot() fails Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260529-nova-unload-v7-4-678f39209e00@nvidia.com> References: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> In-Reply-To: <20260529-nova-unload-v7-0-678f39209e00@nvidia.com> To: Danilo Krummrich , Alice Ryhl , David Airlie , Simona Vetter Cc: John Hubbard , Alistair Popple , Timur Tabi , Eliot Courtney , nova-gpu@lists.linux.dev, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, Alexandre Courbot X-Mailer: b4 0.15.2 X-ClientProxiedBy: TYCP286CA0157.JPNP286.PROD.OUTLOOK.COM (2603:1096:400:383::12) To CH2PR12MB3990.namprd12.prod.outlook.com (2603:10b6:610:28::18) Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: CH2PR12MB3990:EE_|MN2PR12MB4237:EE_ X-MS-Office365-Filtering-Correlation-Id: 0832ac4b-78de-4b4d-469c-08debd54afd1 X-MS-Exchange-SenderADCheck: 1 X-MS-Exchange-AntiSpam-Relay: 0 X-Microsoft-Antispam: BCL:0;ARA:13230040|366016|1800799024|376014|10070799003|18002099003|22082099003|11063799006|56012099006|6133799003; X-Microsoft-Antispam-Message-Info: KdrebAVxLbOcQgXmzO8yzKta4cbYG6pbj+4tounpDTFSYOF7gvY4fwZqJ3jK0n+/4ry2WPGTlKYreBR/udP/O6zys5Lg07ImX4KgYTDmUgFEqk7jBPzSiqe6uMM/EbFGxsFC9GvtKX3Iz4EcVZ7/WFq7nXkdJFro27/O7QN+KfufacV4x739t5Y+m5PC0XL4pqn3IEnqtYhj3qTN46SU6SQn/KvGftGsDHT6tdBgYpb9jECn2xy1VgU199gFSJXBE/EqdMeqxbu8fzy4i59Siu35YgD9o/jCihD4kk+aKq187hF0dPHEoW8LLa2GQsGjmXJwMBI9dnka3suT4RALV+PXrPrJ/ulkxHKhKdw4uHTccYBHvdPY+Zs0lxska0FpOpg4DeJlaFFcBbtsFlcjgUWFRGGw+I498FftSvA91LNASMJx6/IbSnv3cryVCvu1VP9ZT5R6KkFgvRgXPgKWjYLQ3vuaL4yijpillT0NekUBEuoWccBjVwteQ7Ri7vdqt/xAS+Q0kyt6oTLufZ52C+QHCKZEuZZ064+qushvSQuXlPxvBl8xH2sbCutW6gMoefmw/30/Y5t708LtZjMoVqY3eIqa3KaRUq+Sun7xGCK+WVg89v+369R0psxmGoIuMi5iYI4U+SQ/RfK4wzn8xnO+P8kLAxSBaBCJ/a5YeMiBAmS1pW45RG5YwWjUdr7F X-Forefront-Antispam-Report: CIP:255.255.255.255;CTRY:;LANG:en;SCL:1;SRV:;IPV:NLI;SFV:NSPM;H:CH2PR12MB3990.namprd12.prod.outlook.com;PTR:;CAT:NONE;SFS:(13230040)(366016)(1800799024)(376014)(10070799003)(18002099003)(22082099003)(11063799006)(56012099006)(6133799003);DIR:OUT;SFP:1101; X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 2 X-MS-Exchange-AntiSpam-MessageData-0: =?utf-8?B?N2QxbTV6Vy82WVlja2J2ckFjWjlYOW4wOW5uUTYzbWgyM1RIMVVOOTRNQnVs?= =?utf-8?B?d3VzZUVKU1VGY2ZMenJ6YWpuRG93TTlwSTN5R1VDQlJhSGRZcE5SVU9UUWRu?= =?utf-8?B?cUtTL1pIUFdDMlZJaGdlQjd5WmRxajRXVE9vWTRBZDcwQXE1K2NBNjd3Z3RZ?= =?utf-8?B?UUd1TEk5eTVNZUxVdDBWd0JJQkk5Q1hOV2NRRXJRWHdkU3k3ZUhWUW56K1ZX?= =?utf-8?B?aEszenkrWjdSKzFxajlsenhzZ29pbmpOaXAzdVJ1dUlvR1N1UFNnYTNDd0pX?= =?utf-8?B?TkV3QU51YWlCTmwzZ1NSZHRUVllkYjFQaXplK2FPL1ZSMjNMemJJcDBSK0NF?= =?utf-8?B?MVBnVDlVVllPUEE5VnV1cENrT09BSUVCT1ViSGY4eGc0RjdpZHFGQnlhZjk1?= =?utf-8?B?RGt3TXdDdHlFcExKL1NmZmZFZTdsMm9ZOVlRajg4b0Z6Z3kvVjgzcDMvc2Y4?= =?utf-8?B?bFBabHpwRzVva0d2UXZzcjJ0bzVVOW1mK3IrUXI1bzNUbTduMFZXSWw3dDY5?= =?utf-8?B?MGJiVGJQU3BRMnpGZlhUUGM0V043WWErcjQxWk8rR0tKbnovQm5rTFZNU1VL?= =?utf-8?B?Rmk5emxMd1lvUElJZGNRc0hoZFpuTWhKZkZiQmVNVlBxdHV1QXEvZ3FGYWVy?= =?utf-8?B?Znd6elgzMlRBYThjMTVyRXBYdFJoZVNOM09oVmRhWTdBZ3hMT1ZQTDhyRk9n?= =?utf-8?B?aWpOakR4bzJ6MkRzZlBTRCtDNHIyUXZURG1pcmhRWTVGYjNGOThZL0tHV29L?= =?utf-8?B?WHY5dGRMNG83d21EY1FOZWNXa2xKcmhFRE0zNXNKTGlkVkluVE9MZmpQT01N?= =?utf-8?B?UTFNdXg2Z1l0c1FHWmdEQ0JjUUFRekdqY0FSbEs3OGZEWitYUDRxUHZoaHpv?= =?utf-8?B?V2RoOS9GTTkxaEhueFh5WHZCQkI4NmZ5dUROQU5PeTdCaGJoVTdXdlNkZ0Ux?= =?utf-8?B?UXIxdktUZmc0MGNMcHFOZjdPN1lwWVJIQjEzUTV2aVBtMFNRZjh1UkwzVEdC?= =?utf-8?B?MVgvNU9ZVjAxejVOSEdpZUxEL0ZqK0lybHpjdUNmb3FQdU1SZktSNE9zZC9z?= =?utf-8?B?Z0g2ekdCU05QNVVYUU9XYkpFbW84Tm5SNFBiTms2UXNIK3d5Y1RHRExnRm04?= =?utf-8?B?ZjBZallUQ3YybGxzQ0pvMjVabDNkN2lBUFFSeDlRVFRnMW9uNzhjVWR6cFFF?= =?utf-8?B?SGJHaE1rMkVLN0pNN1VrdFRBN2ZWdWJSS3BWK0Ezck5RVzFvVXVqMm5wd3Va?= =?utf-8?B?bnZTMThCaDM5NnpSTzR4T1IzbHdJeTJnZVJQU3NKVkk4ZHpNbTl6bFU1WHZ1?= =?utf-8?B?ZStwOUtsc3kwY0Q0UmVPckVFS0ZZYTRMUjVFOE9UMXpYcmNacklNNDh0Q2Y5?= =?utf-8?B?T25sN3g0QkRwM0tUZm16WWFCaStOOW9WS0tXak8yL1ZCUjArWGJIS2llYUhF?= =?utf-8?B?RmtyWmNQbHBaK1kxQlVaUjM3cVlhdEoyVS9iSWNqT1FZcWphYnIxRFM1N2lR?= =?utf-8?B?ODF3aEd2OFlwK3lDS1RBbDVmQVhqb2tjNXlnMG1xVVNBQ0hVZzZkVjIveWlY?= =?utf-8?B?VWgyQi9VeU5KMlptZWZoRVNsTDNja1MrRmxSZzBvUTFjaEJkd2p4OFhOdnYw?= =?utf-8?B?djdpZWFPUWFadTA3UmdwWkFSQStMbDJ3ZHBBdkxYS1hpVnd6aExYWmpKSlRx?= =?utf-8?B?Y3hzY3ZrRU5NVS9GTkNMVlkxL2NLV0pBbkc3Y1E5ZzhuM2wzQXRZZzZKWk1q?= =?utf-8?B?c0JHRG9tQzdEc3JUYmtDUXRzeWpubEgyeWVza0xiZGtzVFFYVDdCZVVGc1Jh?= =?utf-8?B?MG5FUXJPeHBOMU1NNi9KZlBDMEJIR3lDN1BiZHE5YVI0VC8vbkZvVENCYVp1?= =?utf-8?B?cVNGS0lXWWZVenVtaDhtUVF6OGRHcENyU2pQVk13cE1UUWphZVVDZlUrQUpG?= =?utf-8?B?QXpIMExZeVNqSnFpOG1qMzUzajFKeG03OUw3UTk3dEtsVktRVGdrb1gyblg5?= =?utf-8?B?UzFZUTVVSnFmQkdVVlNDcFQzV0lXZDRaeXpxL0d0SG9yeWwwWEthLzBsL0hm?= =?utf-8?B?QWxsZ002dU9aSW9SNGozYjloYm1la1NMUFIxbzJTZ1haVWxXZm1ZdWtkVFdZ?= =?utf-8?B?OFRUM2dEdWVSaXh3aTA5dHRXMHphcVBoRmVyVTh1M3orK3N6OGxUWnBTYjlo?= =?utf-8?B?Q0lZd0pVQmE4dVdqZlNVODJkdmk0V0JLUXZaUU9vVlY4ZWFBd3E4b1FlUTRs?= =?utf-8?B?TmtWemQ0SE5VVTVuR01xekUyK2Q4YldObWRRSm1MRUM4RXVzK2dwb25KajV1?= =?utf-8?B?alQwakFKLzZFa1FHN2wyY1FXYlpjNlU4allzc21tVlZBdHRVZWN0YkhWQ2ZF?= =?utf-8?Q?NBZ7a06QZ48R7NTqx6Pp5VRfgCmTJqDu6TWsR2j2fzUws?= X-MS-Exchange-AntiSpam-MessageData-1: JG8+EvijQwVXtw== X-OriginatorOrg: Nvidia.com X-MS-Exchange-CrossTenant-Network-Message-Id: 0832ac4b-78de-4b4d-469c-08debd54afd1 X-MS-Exchange-CrossTenant-AuthSource: CH2PR12MB3990.namprd12.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 29 May 2026 07:34:16.1576 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 43083d15-7273-40c1-b7db-39efd9ccc17a X-MS-Exchange-CrossTenant-MailboxType: HOSTED X-MS-Exchange-CrossTenant-UserPrincipalName: 1PoXZs6FMaBiOIOoUvpLoER7OPlfPOkE4/4WO4Gk4NB558SFVDIWsalKZbsJZRC8Pg2R35h8cKldtvV/UJc4jA== X-MS-Exchange-Transport-CrossTenantHeadersStamped: MN2PR12MB4237 If `Gsp::boot` fails, the GSP can be left in a state where boot cannot be attempted again unless it is reset first. To avoid this, we want to run the unload bundle whenever `boot` fails to try and clear the partially-initialized state. Do this by wrapping the unload bundle into a drop guard up until `boot` returns. After that, running the unload bundle becomes the responsibility of the caller. Signed-off-by: Alexandre Courbot Reviewed-by: Danilo Krummrich Reviewed-by: Eliot Courtney --- drivers/gpu/nova-core/gsp/boot.rs | 67 ++++++++++++++++++++++++++++++= ++-- drivers/gpu/nova-core/gsp/hal.rs | 19 +++++----- drivers/gpu/nova-core/gsp/hal/gh100.rs | 15 ++++---- drivers/gpu/nova-core/gsp/hal/tu102.rs | 31 ++++++++++------ 4 files changed, 101 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/= boot.rs index 8d6fcc35b653..1f83f63ceeb0 100644 --- a/drivers/gpu/nova-core/gsp/boot.rs +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -8,7 +8,8 @@ io::poll::read_poll_timeout, pci, prelude::*, - time::Delta, // + time::Delta, + types::ScopeGuard, // }; =20 use crate::{ @@ -31,6 +32,66 @@ }, }; =20 +/// Arguments required to call [`Gsp::unload`](super::Gsp::unload). +/// +/// Stored as their own type to avoid repeating a long and tedious list in= [`BootUnloadGuard`]. +pub(super) struct BootUnloadArgs<'a> { + gsp: &'a super::Gsp, + dev: &'a device::Device, + bar: &'a Bar0, + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + unload_bundle: Option, +} + +/// Guard that calls [`Gsp::unload`](super::Gsp::unload) with a +/// [`UnloadBundle`](super::UnloadBundle) when dropped. +/// +/// Used to ensure the `UnloadBundle` is run during failure paths. +pub(super) struct BootUnloadGuard<'a> { + guard: ScopeGuard, fn(BootUnloadArgs<'a>)>, +} + +impl<'a> BootUnloadGuard<'a> { + /// Wraps `unload_bundle` into a guard that executes it when dropped. + pub(super) fn new( + gsp: &'a super::Gsp, + dev: &'a device::Device, + bar: &'a Bar0, + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + unload_bundle: Option, + ) -> Self { + Self { + guard: ScopeGuard::new_with_data( + BootUnloadArgs { + gsp, + dev, + bar, + gsp_falcon, + sec2_falcon, + unload_bundle, + }, + |args| { + let _ =3D super::Gsp::unload( + args.gsp, + args.dev, + args.bar, + args.gsp_falcon, + args.sec2_falcon, + args.unload_bundle, + ); + }, + ), + } + } + + /// Disarms the guard and returns the [`UnloadBundle`](super::UnloadBu= ndle) it contains. + pub(super) fn dismiss(self) -> Option { + self.guard.dismiss().unload_bundle + } +} + impl super::Gsp { /// Attempt to boot the GSP. /// @@ -59,7 +120,7 @@ pub(crate) fn boot( let wpr_meta =3D Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new= (&gsp_fw, &fb_layout))?; =20 // Perform the chipset-specific boot sequence, and retrieve the un= load bundle. - let unload_bundle =3D hal.boot( + let unload_guard =3D hal.boot( &self, dev, bar, @@ -99,7 +160,7 @@ pub(crate) fn boot( Err(e) =3D> dev_warn!(pdev, "GPU name unavailable: {:?}\n", e), } =20 - Ok(unload_bundle) + Ok(unload_guard.dismiss()) } =20 /// Shut down the GSP and wait until it is offline. diff --git a/drivers/gpu/nova-core/gsp/hal.rs b/drivers/gpu/nova-core/gsp/h= al.rs index 501b852dcb29..88fc3e791114 100644 --- a/drivers/gpu/nova-core/gsp/hal.rs +++ b/drivers/gpu/nova-core/gsp/hal.rs @@ -25,6 +25,7 @@ Chipset, // }, gsp::{ + boot::BootUnloadGuard, Gsp, GspFwWprMeta, // }, @@ -50,20 +51,20 @@ fn run( pub(super) trait GspHal: Send { /// Performs the GSP boot process, loading and running the required fi= rmwares as needed. /// - /// Upon success, returns the [`UnloadBundle`] to be run (if any) in o= rder to properly reset the - /// GSP after it has been stopped. + /// Upon success, returns a guard that runs the GSP unload sequence if= GSP boot does not + /// complete. #[allow(clippy::too_many_arguments)] - fn boot( + fn boot<'a>( &self, - gsp: &Gsp, - dev: &device::Device, - bar: &Bar0, + gsp: &'a Gsp, + dev: &'a device::Device, + bar: &'a Bar0, chipset: Chipset, fb_layout: &FbLayout, wpr_meta: &Coherent, - gsp_falcon: &Falcon, - sec2_falcon: &Falcon, - ) -> Result>; + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + ) -> Result>; =20 /// Performs HAL-specific post-GSP boot tasks. /// diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core= /gsp/hal/gh100.rs index 0a8b7f763883..9a4bb22578b3 100644 --- a/drivers/gpu/nova-core/gsp/hal/gh100.rs +++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs @@ -18,6 +18,7 @@ fb::FbLayout, gpu::Chipset, gsp::{ + boot::BootUnloadGuard, hal::GspHal, Gsp, GspFwWprMeta, // @@ -31,17 +32,17 @@ impl GspHal for Gh100 { /// /// This path uses FSP to establish a chain of trust and boot GSP-FMC.= FSP handles /// the GSP boot internally - no manual GSP reset/boot is needed. - fn boot( + fn boot<'a>( &self, - _gsp: &Gsp, - _dev: &device::Device, - _bar: &Bar0, + _gsp: &'a Gsp, + _dev: &'a device::Device, + _bar: &'a Bar0, _chipset: Chipset, _fb_layout: &FbLayout, _wpr_meta: &Coherent, - _gsp_falcon: &Falcon, - _sec2_falcon: &Falcon, - ) -> Result> { + _gsp_falcon: &'a Falcon, + _sec2_falcon: &'a Falcon, + ) -> Result> { Err(ENOTSUPP) } } diff --git a/drivers/gpu/nova-core/gsp/hal/tu102.rs b/drivers/gpu/nova-core= /gsp/hal/tu102.rs index c4ab081f25c4..6a27e7e90279 100644 --- a/drivers/gpu/nova-core/gsp/hal/tu102.rs +++ b/drivers/gpu/nova-core/gsp/hal/tu102.rs @@ -32,6 +32,7 @@ }, gpu::Chipset, gsp::{ + boot::BootUnloadGuard, hal::{ GspHal, UnloadBundle, // @@ -254,21 +255,23 @@ fn run_fwsec_frts( struct Tu102; =20 impl GspHal for Tu102 { - fn boot( + fn boot<'a>( &self, - gsp: &Gsp, - dev: &device::Device, - bar: &Bar0, + gsp: &'a Gsp, + dev: &'a device::Device, + bar: &'a Bar0, chipset: Chipset, fb_layout: &FbLayout, wpr_meta: &Coherent, - gsp_falcon: &Falcon, - sec2_falcon: &Falcon, - ) -> Result> { + gsp_falcon: &'a Falcon, + sec2_falcon: &'a Falcon, + ) -> Result> { let bios =3D Vbios::new(dev, bar)?; =20 - // Try and prepare the unload bundle. If this fails, the GPU will = need to be reset - // before the driver can be probed again. + // Try and prepare the unload bundle. + // + // If the unload bundle creation fails, the GPU will need to be re= set before the driver can + // be probed again. let unload_bundle =3D Sec2UnloadBundle::build(dev, bar, chipset, &bios, gsp_falcon, = sec2_falcon) .inspect_err(|e| { @@ -279,8 +282,12 @@ fn boot( "The GPU will need to be reset before the driver c= an bind again.\n" ); }) - .map(crate::gsp::UnloadBundle) - .ok(); + .ok() + .map(crate::gsp::UnloadBundle); + + // Wrap the unload bundle into a drop guard so it is automatically= run upon failure. + let unload_guard =3D + BootUnloadGuard::new(gsp, dev, bar, gsp_falcon, sec2_falcon, u= nload_bundle); =20 // FWSEC-FRTS is not executed on chips where the FRTS region size = is 0 (e.g. GA100). if !fb_layout.frts.is_empty() { @@ -311,7 +318,7 @@ fn boot( )? .run(dev, bar, sec2_falcon, wpr_meta)?; =20 - Ok(unload_bundle) + Ok(unload_guard) } =20 fn post_boot( --=20 2.54.0