Splits large RPCs if necessary and sends the remaining parts using
continuation records. RPCs that do not need continuation records
continue to write directly into the command buffer. Ones that do write
into a staging buffer first, so there is one copy.
Continuation record for receive is not necessary to support at the
moment because those replies do not need to be read and are currently
drained by retrying `receive_msg` on ERANGE.
Tested-by: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Eliot Courtney <ecourtney@nvidia.com>
---
drivers/gpu/nova-core/gsp/cmdq.rs | 43 +++++++++-
drivers/gpu/nova-core/gsp/commands.rs | 144 +++++++++++++++++++++++++++++++++-
drivers/gpu/nova-core/gsp/fw.rs | 4 +
3 files changed, 187 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 4a663a5b3437..d68b93ddf7cc 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -28,6 +28,10 @@
use crate::{
driver::Bar0,
gsp::{
+ commands::{
+ ContinuationRecord,
+ SplitState, //
+ },
fw::{
GspMsgElement,
MsgFunction,
@@ -452,7 +456,7 @@ struct GspMessage<'a> {
/// Computes the total size of the command (including its variable-length payload) without the
/// [`GspMsgElement`] header.
-fn command_size<M>(command: &M) -> usize
+pub(crate) fn command_size<M>(command: &M) -> usize
where
M: CommandToGsp,
{
@@ -520,7 +524,7 @@ fn notify_gsp(bar: &Bar0) {
.write(bar);
}
- /// Sends `command` to the GSP.
+ /// Sends `command` to the GSP, without splitting it.
///
/// # Errors
///
@@ -529,7 +533,7 @@ fn notify_gsp(bar: &Bar0) {
/// written to by its [`CommandToGsp::init_variable_payload`] method.
///
/// Error codes returned by the command initializers are propagated as-is.
- pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
+ fn send_single_command<M>(&mut self, bar: &Bar0, command: M) -> Result
where
M: CommandToGsp,
// This allows all error types, including `Infallible`, to be used for `M::InitError`.
@@ -588,6 +592,39 @@ pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
Ok(())
}
+ /// Sends `command` to the GSP.
+ ///
+ /// The command may be split into multiple messages if it is large.
+ ///
+ /// # Errors
+ ///
+ /// - `ETIMEDOUT` if space does not become available within the timeout.
+ /// - `EIO` if the variable payload requested by the command has not been entirely
+ /// written to by its [`CommandToGsp::init_variable_payload`] method.
+ ///
+ /// Error codes returned by the command initializers are propagated as-is.
+ pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
+ where
+ M: CommandToGsp,
+ Error: From<M::InitError>,
+ {
+ let mut state = SplitState::new(&command)?;
+
+ self.send_single_command(bar, state.command(command))?;
+
+ while let Some(continuation) = state.next_continuation_record() {
+ dev_dbg!(
+ &self.dev,
+ "GSP RPC: send continuation: size=0x{:x}\n",
+ command_size(&continuation),
+ );
+ // Turbofish needed because the compiler cannot infer M here.
+ self.send_single_command::<ContinuationRecord<'_>>(bar, continuation)?;
+ }
+
+ Ok(())
+ }
+
/// Wait for a message to become available on the message queue.
///
/// This works purely at the transport layer and does not interpret or validate the message
diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
index 8f270eca33be..6ffd0b9cbf05 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -4,6 +4,7 @@
array,
convert::Infallible,
ffi::FromBytesUntilNulError,
+ marker::PhantomData,
str::Utf8Error, //
};
@@ -22,13 +23,16 @@
driver::Bar0,
gsp::{
cmdq::{
+ command_size,
Cmdq,
CommandToGsp,
MessageFromGsp, //
},
fw::{
commands::*,
- MsgFunction, //
+ GspMsgElement,
+ MsgFunction,
+ GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, //
},
},
sbuffer::SBufferIter,
@@ -242,3 +246,141 @@ pub(crate) fn get_gsp_info(cmdq: &mut Cmdq, bar: &Bar0) -> Result<GetGspStaticIn
}
}
}
+
+/// The `ContinuationRecord` command.
+pub(crate) struct ContinuationRecord<'a> {
+ data: &'a [u8],
+}
+
+impl<'a> ContinuationRecord<'a> {
+ /// Creates a new `ContinuationRecord` command with the given data.
+ pub(crate) fn new(data: &'a [u8]) -> Self {
+ Self { data }
+ }
+}
+
+impl<'a> CommandToGsp for ContinuationRecord<'a> {
+ const FUNCTION: MsgFunction = MsgFunction::ContinuationRecord;
+ type Command = ();
+ type InitError = Infallible;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ <()>::init_zeroed()
+ }
+
+ fn variable_payload_len(&self) -> usize {
+ self.data.len()
+ }
+
+ fn init_variable_payload(
+ &self,
+ dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
+ ) -> Result {
+ dst.write_all(self.data)
+ }
+}
+
+/// Wrapper that splits a command across continuation records if needed.
+pub(crate) struct SplitState<C: CommandToGsp> {
+ state: Option<(KVVec<u8>, usize)>,
+ _phantom: PhantomData<C>,
+}
+
+impl<C: CommandToGsp> SplitState<C> {
+ /// Maximum command size that fits in a single queue element.
+ const MAX_CMD_SIZE: usize = GSP_MSG_QUEUE_ELEMENT_SIZE_MAX - size_of::<GspMsgElement>();
+
+ /// Maximum size of the variable payload that can be sent in the main command.
+ const MAX_FIRST_PAYLOAD_SIZE: usize = Self::MAX_CMD_SIZE - size_of::<C::Command>();
+
+ /// Creates a new `SplitState` for the given command.
+ ///
+ /// If the command is too large, it will be split into a main command and some number of
+ /// continuation records.
+ pub(crate) fn new(inner: &C) -> Result<Self> {
+ if command_size(inner) > Self::MAX_CMD_SIZE {
+ let mut staging =
+ KVVec::<u8>::from_elem(0u8, inner.variable_payload_len(), GFP_KERNEL)?;
+ let mut sbuffer = SBufferIter::new_writer([staging.as_mut_slice(), &mut []]);
+ inner.init_variable_payload(&mut sbuffer)?;
+ if !sbuffer.is_empty() {
+ return Err(EIO);
+ }
+ drop(sbuffer);
+
+ Ok(Self {
+ state: Some((staging, Self::MAX_FIRST_PAYLOAD_SIZE)),
+ _phantom: PhantomData,
+ })
+ } else {
+ Ok(Self {
+ state: None,
+ _phantom: PhantomData,
+ })
+ }
+ }
+
+ /// Returns the main command.
+ pub(crate) fn command(&self, inner: C) -> SplitCommand<'_, C> {
+ if let Some((staging, _)) = &self.state {
+ SplitCommand::Split(inner, staging)
+ } else {
+ SplitCommand::Single(inner)
+ }
+ }
+
+ /// Returns the next continuation record, or `None` if there are no more.
+ pub(crate) fn next_continuation_record(&mut self) -> Option<ContinuationRecord<'_>> {
+ let (staging, offset) = self.state.as_mut()?;
+
+ let remaining = staging.len() - *offset;
+ if remaining > 0 {
+ let chunk_size = remaining.min(Self::MAX_CMD_SIZE);
+ let record = ContinuationRecord::new(&staging[*offset..(*offset + chunk_size)]);
+ *offset += chunk_size;
+ Some(record)
+ } else {
+ None
+ }
+ }
+}
+
+/// Wrapper enum that represents either a single command or a split command with its staging buffer.
+pub(crate) enum SplitCommand<'a, C: CommandToGsp> {
+ /// A command that fits in a single queue element.
+ Single(C),
+ /// A command split across continuation records, with its full payload in a staging buffer.
+ Split(C, &'a [u8]),
+}
+
+impl<'a, C: CommandToGsp> CommandToGsp for SplitCommand<'a, C> {
+ const FUNCTION: MsgFunction = C::FUNCTION;
+ type Command = C::Command;
+ type InitError = C::InitError;
+
+ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
+ match self {
+ SplitCommand::Single(cmd) => cmd.init(),
+ SplitCommand::Split(cmd, _) => cmd.init(),
+ }
+ }
+
+ fn variable_payload_len(&self) -> usize {
+ match self {
+ SplitCommand::Single(cmd) => cmd.variable_payload_len(),
+ SplitCommand::Split(_, _) => SplitState::<C>::MAX_FIRST_PAYLOAD_SIZE,
+ }
+ }
+
+ fn init_variable_payload(
+ &self,
+ dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
+ ) -> Result {
+ match self {
+ SplitCommand::Single(cmd) => cmd.init_variable_payload(dst),
+ SplitCommand::Split(_, staging) => {
+ dst.write_all(&staging[..self.variable_payload_len()])
+ }
+ }
+ }
+}
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index 6005362450cb..25fca1f6db2c 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -202,6 +202,7 @@ pub(crate) enum MsgFunction {
AllocObject = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT,
AllocRoot = bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT,
BindCtxDma = bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA,
+ ContinuationRecord = bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD,
Free = bindings::NV_VGPU_MSG_FUNCTION_FREE,
GetGspStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO,
GetStaticInfo = bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO,
@@ -239,6 +240,9 @@ fn try_from(value: u32) -> Result<MsgFunction> {
bindings::NV_VGPU_MSG_FUNCTION_ALLOC_OBJECT => Ok(MsgFunction::AllocObject),
bindings::NV_VGPU_MSG_FUNCTION_ALLOC_ROOT => Ok(MsgFunction::AllocRoot),
bindings::NV_VGPU_MSG_FUNCTION_BIND_CTX_DMA => Ok(MsgFunction::BindCtxDma),
+ bindings::NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD => {
+ Ok(MsgFunction::ContinuationRecord)
+ }
bindings::NV_VGPU_MSG_FUNCTION_FREE => Ok(MsgFunction::Free),
bindings::NV_VGPU_MSG_FUNCTION_GET_GSP_STATIC_INFO => Ok(MsgFunction::GetGspStaticInfo),
bindings::NV_VGPU_MSG_FUNCTION_GET_STATIC_INFO => Ok(MsgFunction::GetStaticInfo),
--
2.53.0
Hi Eliot,
On Thu Feb 26, 2026 at 8:45 PM JST, Eliot Courtney wrote:
<snip>
> @@ -588,6 +592,39 @@ pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
> Ok(())
> }
>
> + /// Sends `command` to the GSP.
> + ///
> + /// The command may be split into multiple messages if it is large.
> + ///
> + /// # Errors
> + ///
> + /// - `ETIMEDOUT` if space does not become available within the timeout.
> + /// - `EIO` if the variable payload requested by the command has not been entirely
> + /// written to by its [`CommandToGsp::init_variable_payload`] method.
> + ///
> + /// Error codes returned by the command initializers are propagated as-is.
> + pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
> + where
> + M: CommandToGsp,
> + Error: From<M::InitError>,
> + {
> + let mut state = SplitState::new(&command)?;
> +
> + self.send_single_command(bar, state.command(command))?;
As we discussed offline these two lines were bothering me a bit, because
we pass `command` twice and there is a possibility (although purely
hypothetical) of API misuse. I think I found a way around it: we turn
`SplitState` into an enum with a `Single` variant, that contains the
original command, and a `Split` one, which contains the truncated
initial command and its continuation records.
This simple change cascades into more simplifications in the new types
introduced by this patch: `SplitCommand` is not an enum anymore, but the
original command with a shorter payload. And the continuation records
are their own self-contained type. This is achieved by allocating two
`KVVec`s when we split, and taking advantage of the `SBuffer` we
conveniently have at hand to fill them both.
I have pushed a branch with this idea implemented on top of yours in [1]
- please check it, but I think it brings a nice (if small) incremental
improvement. It removes the API misuse potential, makes every type
purely single-purpose, and doesn't need `PhantomData` or lifetimes,
making it simpler overall IMHO.
[1] https://github.com/Gnurou/linux/tree/review/continuations
> +
> + while let Some(continuation) = state.next_continuation_record() {
> + dev_dbg!(
> + &self.dev,
> + "GSP RPC: send continuation: size=0x{:x}\n",
> + command_size(&continuation),
> + );
`send_single_command` should already print the command, so I don't think
we need this additional debug print?
> + // Turbofish needed because the compiler cannot infer M here.
> + self.send_single_command::<ContinuationRecord<'_>>(bar, continuation)?;
> + }
> +
> + Ok(())
> + }
> +
> /// Wait for a message to become available on the message queue.
> ///
> /// This works purely at the transport layer and does not interpret or validate the message
> diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
> index 8f270eca33be..6ffd0b9cbf05 100644
> --- a/drivers/gpu/nova-core/gsp/commands.rs
> +++ b/drivers/gpu/nova-core/gsp/commands.rs
> @@ -4,6 +4,7 @@
> array,
> convert::Infallible,
> ffi::FromBytesUntilNulError,
> + marker::PhantomData,
> str::Utf8Error, //
> };
>
> @@ -22,13 +23,16 @@
> driver::Bar0,
> gsp::{
> cmdq::{
> + command_size,
> Cmdq,
> CommandToGsp,
> MessageFromGsp, //
> },
> fw::{
> commands::*,
> - MsgFunction, //
> + GspMsgElement,
> + MsgFunction,
> + GSP_MSG_QUEUE_ELEMENT_SIZE_MAX, //
> },
> },
> sbuffer::SBufferIter,
> @@ -242,3 +246,141 @@ pub(crate) fn get_gsp_info(cmdq: &mut Cmdq, bar: &Bar0) -> Result<GetGspStaticIn
> }
> }
> }
> +
> +/// The `ContinuationRecord` command.
> +pub(crate) struct ContinuationRecord<'a> {
These new types are to be used by the command queue, and we don't want
to use them elsewhere, so `pub(super)` seems more appropriate to me.
And actually, since they are more command queue infrastructure than pure
"commands" that fulfill a given purpose, I'd suggest moving them into
their own sub-module of `gsp` (named `continuation` or `split` maybe?).
That way they don't get in the way of readers who just want to learn
about GSP commands.
On Fri Feb 27, 2026 at 8:28 PM JST, Alexandre Courbot wrote:
>> + /// Sends `command` to the GSP.
>> + ///
>> + /// The command may be split into multiple messages if it is large.
>> + ///
>> + /// # Errors
>> + ///
>> + /// - `ETIMEDOUT` if space does not become available within the timeout.
>> + /// - `EIO` if the variable payload requested by the command has not been entirely
>> + /// written to by its [`CommandToGsp::init_variable_payload`] method.
>> + ///
>> + /// Error codes returned by the command initializers are propagated as-is.
>> + pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
>> + where
>> + M: CommandToGsp,
>> + Error: From<M::InitError>,
>> + {
>> + let mut state = SplitState::new(&command)?;
>> +
>> + self.send_single_command(bar, state.command(command))?;
>
> As we discussed offline these two lines were bothering me a bit, because
> we pass `command` twice and there is a possibility (although purely
> hypothetical) of API misuse. I think I found a way around it: we turn
> `SplitState` into an enum with a `Single` variant, that contains the
> original command, and a `Split` one, which contains the truncated
> initial command and its continuation records.
>
> This simple change cascades into more simplifications in the new types
> introduced by this patch: `SplitCommand` is not an enum anymore, but the
> original command with a shorter payload. And the continuation records
> are their own self-contained type. This is achieved by allocating two
> `KVVec`s when we split, and taking advantage of the `SBuffer` we
> conveniently have at hand to fill them both.
>
> I have pushed a branch with this idea implemented on top of yours in [1]
> - please check it, but I think it brings a nice (if small) incremental
> improvement. It removes the API misuse potential, makes every type
> purely single-purpose, and doesn't need `PhantomData` or lifetimes,
> making it simpler overall IMHO.
>
> [1] https://github.com/Gnurou/linux/tree/review/continuations
Thank you for looking into this and I agree this new design is better.
We pay an extra allocation but it's rare that that path will be hit, so
I agree it's worth it.
Added and thanks for your work on improving this API.
>> +
>> +/// The `ContinuationRecord` command.
>> +pub(crate) struct ContinuationRecord<'a> {
>
> These new types are to be used by the command queue, and we don't want
> to use them elsewhere, so `pub(super)` seems more appropriate to me.
>
> And actually, since they are more command queue infrastructure than pure
> "commands" that fulfill a given purpose, I'd suggest moving them into
> their own sub-module of `gsp` (named `continuation` or `split` maybe?).
> That way they don't get in the way of readers who just want to learn
> about GSP commands.
Done.
© 2016 - 2026 Red Hat, Inc.