Implment the io-uring abstractions needed for miscdevicecs and other
char devices that have io-uring command interface.
* `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which
will be used as arg for `MiscDevice::uring_cmd()`. And driver can get
`cmd_op` sent from userspace. Also it has `flags` which includes option
that is reissued.
* `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which
could be get from `IoUringCmd::sqe()` and driver could get `cmd_data`
from userspace. Also `IoUringSqe` has more data like opcode could be used in
driver.
Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai>
---
rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++
rust/kernel/lib.rs | 1 +
2 files changed, 307 insertions(+)
create mode 100644 rust/kernel/io_uring.rs
diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs
new file mode 100644
index 000000000000..61e88bdf4e42
--- /dev/null
+++ b/rust/kernel/io_uring.rs
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0
+// SPDX-FileCopyrightText: (C) 2025 Furiosa AI
+
+//! Abstractions for io-uring.
+//!
+//! This module provides types for implements io-uring interface for char device.
+//!
+//!
+//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and
+//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h)
+
+use core::{mem::MaybeUninit, pin::Pin};
+
+use crate::error::from_result;
+use crate::transmute::{AsBytes, FromBytes};
+use crate::{fs::File, types::Opaque};
+
+use crate::prelude::*;
+
+/// io-uring opcode
+pub mod opcode {
+ /// opcode for uring cmd
+ pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD;
+}
+
+/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure.
+///
+/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd`
+/// binding from the Linux kernel. It represents a command structure used
+/// in io_uring operations within the kernel.
+/// This type is used internally by the io_uring subsystem to manage
+/// asynchronous I/O commands.
+///
+/// This type should not be constructed or manipulated directly by
+/// kernel module developers.
+///
+/// # INVARIANT
+/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`.
+#[repr(transparent)]
+pub struct IoUringCmd {
+ /// An opaque wrapper containing the actual `io_uring_cmd` data.
+ inner: Opaque<bindings::io_uring_cmd>,
+}
+
+impl IoUringCmd {
+ /// Returns the cmd_op with associated with the `io_uring_cmd`.
+ #[inline]
+ pub fn cmd_op(&self) -> u32 {
+ // SAFETY: `self.inner` is guaranteed by the type invariant to point
+ // to a live `io_uring_cmd`, so dereferencing is safe.
+ unsafe { (*self.inner.get()).cmd_op }
+ }
+
+ /// Returns the flags with associated with the `io_uring_cmd`.
+ #[inline]
+ pub fn flags(&self) -> u32 {
+ // SAFETY: `self.inner` is guaranteed by the type invariant to point
+ // to a live `io_uring_cmd`, so dereferencing is safe.
+ unsafe { (*self.inner.get()).flags }
+ }
+
+ /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd
+ ///
+ /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size.
+ #[inline]
+ pub fn read_pdu<T: FromBytes>(&self) -> Result<T> {
+ // SAFETY: `self.inner` is guaranteed by the type invariant to point
+ // to a live `io_uring_cmd`, so dereferencing is safe.
+ let inner = unsafe { &mut *self.inner.get() };
+
+ let len = size_of::<T>();
+ if len > inner.pdu.len() {
+ return Err(EFAULT);
+ }
+
+ let mut out: MaybeUninit<T> = MaybeUninit::uninit();
+ let ptr = &raw mut inner.pdu as *const c_void;
+
+ // SAFETY:
+ // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant.
+ // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked
+ // size of `T` is smaller than pdu size.
+ unsafe {
+ core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len);
+ }
+
+ // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements
+ // `FromBytes`, any bit-pattern is a valid value for this type.
+ Ok(unsafe { out.assume_init() })
+ }
+
+ /// Writes the provided `value` to `pdu` in uring_cmd `self`
+ ///
+ /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size.
+ #[inline]
+ pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> {
+ // SAFETY: `self.inner` is guaranteed by the type invariant to point
+ // to a live `io_uring_cmd`, so dereferencing is safe.
+ let inner = unsafe { &mut *self.inner.get() };
+
+ let len = size_of::<T>();
+ if len > inner.pdu.len() {
+ return Err(EFAULT);
+ }
+
+ let src = (value as *const T).cast::<c_void>();
+ let dst = &raw mut inner.pdu as *mut c_void;
+
+ // SAFETY:
+ // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes`
+ // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant.
+ // * It's safe to copy because size of `T` is no more than len of pdu.
+ unsafe {
+ core::ptr::copy_nonoverlapping(src, dst, len);
+ }
+
+ Ok(())
+ }
+
+ /// Constructs a new [`IoUringCmd`] from a raw `io_uring_cmd`
+ ///
+ /// # Safety
+ ///
+ /// The caller must guarantee that:
+ /// - `ptr` is non-null, properly aligned, and points to a valid
+ /// `bindings::io_uring_cmd`.
+ /// - The pointed-to memory remains initialized and valid for the entire
+ /// lifetime `'a` of the returned reference.
+ /// - While the returned `Pin<&'a mut IoUringCmd>` is alive, the underlying
+ /// object is **not moved** (pinning requirement).
+ /// - **Aliasing rules:** the returned `&mut` has **exclusive** access to the same
+ /// object for its entire lifetime:
+ /// - No other `&mut` **or** `&` references to the same `io_uring_cmd` may be
+ /// alive at the same time.
+ /// - There must be no concurrent reads/writes through raw pointers, FFI, or
+ /// other kernel paths to the same object during this lifetime.
+ /// - If the object can be touched from other contexts (e.g. IRQ/another CPU),
+ /// the caller must provide synchronization to uphold this exclusivity.
+ /// - This function relies on `IoUringCmd` being `repr(transparent)` over
+ /// `bindings::io_uring_cmd` so the cast preserves layout.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *mut bindings::io_uring_cmd) -> Pin<&'a mut IoUringCmd> {
+ // SAFETY:
+ // * The caller guarantees that the pointer is not dangling and stays
+ // valid for the duration of 'a.
+ // * The cast is okay because `IoUringCmd` is `repr(transparent)` and
+ // has the same memory layout as `bindings::io_uring_cmd`.
+ // * The returned `Pin` ensures that the object cannot be moved, which
+ // is required because the kernel may hold pointers to this memory
+ // location and moving it would invalidate those pointers.
+ unsafe { Pin::new_unchecked(&mut *ptr.cast()) }
+ }
+
+ /// Returns the file that referenced by uring cmd self.
+ #[inline]
+ pub fn file(&self) -> &File {
+ // SAFETY: `self.inner` is guaranteed by the type invariant to point
+ // to a live `io_uring_cmd`, so dereferencing is safe.
+ let file = unsafe { (*self.inner.get()).file };
+
+ // SAFETY:
+ // * The `file` points valid file.
+ // * refcount is positive after submission queue entry issued.
+ // * There is no active fdget_pos region on the file on this thread.
+ unsafe { File::from_raw_file(file) }
+ }
+
+ /// Returns an reference to the [`IoUringSqe`] associated with this command.
+ #[inline]
+ pub fn sqe(&self) -> &IoUringSqe {
+ // SAFETY: `self.inner` is guaranteed by the type invariant to point
+ // to a live `io_uring_cmd`, so dereferencing is safe.
+ let sqe = unsafe { (*self.inner.get()).sqe };
+ // SAFETY: The call guarantees that the `sqe` points valid io_uring_sqe.
+ unsafe { IoUringSqe::from_raw(sqe) }
+ }
+
+ /// Completes an this [`IoUringCmd`] request that was previously queued.
+ ///
+ /// # Safety
+ ///
+ /// - This function must be called **only** for a command whose `uring_cmd`
+ /// handler previously returned **`-EIOCBQUEUED`** to io_uring.
+ ///
+ /// # Parameters
+ ///
+ /// - `ret`: Result to return to userspace.
+ /// - `res2`: Extra for big completion queue entry `IORING_SETUP_CQE32`.
+ /// - `issue_flags`: Flags associated with this request, typically the same
+ /// as those passed to the `uring_cmd` handler.
+ #[inline]
+ pub fn done(self: Pin<&mut IoUringCmd>, ret: Result<i32>, res2: u64, issue_flags: u32) {
+ let ret = from_result(|| ret) as isize;
+ // SAFETY: The call guarantees that `self.inner` is not dangling and stays valid
+ unsafe {
+ bindings::io_uring_cmd_done(self.inner.get(), ret, res2, issue_flags);
+ }
+ }
+}
+
+/// A Rust abstraction for the Linux kernel's `io_uring_sqe` structure.
+///
+/// This structure is a safe, opaque wrapper around the raw C [`io_uring_sqe`](srctree/include/uapi/linux/io_uring.h)
+/// binding from the Linux kernel. It represents a Submission Queue Entry
+/// used in io_uring operations within the kernel.
+///
+/// # Type Safety
+///
+/// The `#[repr(transparent)]` attribute ensures that this wrapper has
+/// the same memory layout as the underlying `io_uring_sqe` structure,
+/// allowing it to be safely transmuted between the two representations.
+///
+/// # Fields
+///
+/// * `inner` - An opaque wrapper containing the actual `io_uring_sqe` data.
+/// The `Opaque` type prevents direct access to the internal
+/// structure fields, ensuring memory safety and encapsulation.
+///
+/// # Usage
+///
+/// This type represents a submission queue entry that describes an I/O
+/// operation to be executed by the io_uring subsystem. It contains
+/// information such as the operation type, file descriptor, buffer
+/// pointers, and other operation-specific data.
+///
+/// Users can obtain this type from [`IoUringCmd::sqe()`] method, which
+/// extracts the submission queue entry associated with a command.
+///
+/// This type should not be constructed or manipulated directly by
+/// kernel module developers.
+///
+/// # INVARIANT
+/// - `self.inner` always points to a valid, live `bindings::io_uring_sqe`.
+#[repr(transparent)]
+pub struct IoUringSqe {
+ inner: Opaque<bindings::io_uring_sqe>,
+}
+
+impl IoUringSqe {
+ /// Reads and interprets the `cmd` field of an `bindings::io_uring_sqe` as a value of type `T`.
+ ///
+ /// # Safety & Invariants
+ /// - Construction of `T` is delegated to `FromBytes`, which guarantees that `T` has no
+ /// invalid bit patterns and can be safely reconstructed from raw bytes.
+ /// - **Limitation:** This implementation does not support `IORING_SETUP_SQE128` (larger SQE entries).
+ /// Only the standard `io_uring_sqe` layout is handled here.
+ ///
+ /// # Errors
+ /// * Returns `EINVAL` if the `self` does not hold a `opcode::URING_CMD`.
+ /// * Returns `EFAULT` if the command buffer is smaller than the requested type `T`.
+ ///
+ /// # Returns
+ /// * On success, returns a `T` deserialized from the `cmd`.
+ /// * On failure, returns an appropriate error as described above.
+ pub fn cmd_data<T: FromBytes>(&self) -> Result<T> {
+ // SAFETY: `self.inner` guaranteed by the type invariant to point
+ // to a live `io_uring_sqe`, so dereferencing is safe.
+ let sqe = unsafe { &*self.inner.get() };
+
+ if u32::from(sqe.opcode) != opcode::URING_CMD {
+ return Err(EINVAL);
+ }
+
+ // SAFETY: Accessing the `sqe.cmd` union field is safe because we've
+ // verified that `sqe.opcode == IORING_OP_URING_CMD`, which guarantees
+ // that this union variant is initialized and valid.
+ let cmd = unsafe { sqe.__bindgen_anon_6.cmd.as_ref() };
+ let cmd_len = size_of_val(&sqe.__bindgen_anon_6.bindgen_union_field);
+
+ if cmd_len < size_of::<T>() {
+ return Err(EFAULT);
+ }
+
+ let cmd_ptr = cmd.as_ptr() as *mut T;
+
+ // SAFETY: `cmd_ptr` is valid from `self.inner` which is guaranteed by
+ // type variant. And also it points to initialized `T` from userspace.
+ let ret = unsafe { core::ptr::read_unaligned(cmd_ptr) };
+
+ Ok(ret)
+ }
+
+ /// Constructs a new `IoUringSqe` from a raw `io_uring_sqe`.
+ ///
+ /// # Safety
+ ///
+ /// The caller must guarantee that:
+ /// - `ptr` is non-null, properly aligned, and points to a valid initialized
+ /// `bindings::io_uring_sqe`.
+ /// - The pointed-to memory remains valid (not freed or repurposed) for the
+ /// entire lifetime `'a` of the returned reference.
+ /// - **Aliasing rules (for `&T`):** while the returned `&'a IoUringSqe` is
+ /// alive, there must be **no mutable access** to the same object through any
+ /// path (no `&mut`, no raw-pointer writes, no FFI/IRQ/other-CPU writers).
+ /// Multiple `&` is fine **only if all of them are read-only** for the entire
+ /// overlapping lifetime.
+ /// - This relies on `IoUringSqe` being `repr(transparent)` over
+ /// `bindings::io_uring_sqe`, so the cast preserves layout.
+ #[inline]
+ pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe {
+ // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the
+ // duration of 'a. The cast is okay because `IoUringSqe` is `repr(transparent)` and has the
+ // same memory layout as `bindings::io_uring_sqe`.
+ unsafe { &*ptr.cast() }
+ }
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index ed53169e795c..d38cf7137401 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -91,6 +91,7 @@
pub mod fs;
pub mod init;
pub mod io;
+pub mod io_uring;
pub mod ioctl;
pub mod jump_label;
#[cfg(CONFIG_KUNIT)]
--
2.43.0
On Fri, Aug 22, 2025 at 5:56 AM Sidong Yang <sidong.yang@furiosa.ai> wrote: > > Implment the io-uring abstractions needed for miscdevicecs and other typo: "Implement" > char devices that have io-uring command interface. > > * `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which > will be used as arg for `MiscDevice::uring_cmd()`. And driver can get > `cmd_op` sent from userspace. Also it has `flags` which includes option > that is reissued. > > * `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which > could be get from `IoUringCmd::sqe()` and driver could get `cmd_data` > from userspace. Also `IoUringSqe` has more data like opcode could be used in > driver. > > Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai> > --- > rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++ > rust/kernel/lib.rs | 1 + > 2 files changed, 307 insertions(+) > create mode 100644 rust/kernel/io_uring.rs > > diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs > new file mode 100644 > index 000000000000..61e88bdf4e42 > --- /dev/null > +++ b/rust/kernel/io_uring.rs > @@ -0,0 +1,306 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI > + > +//! Abstractions for io-uring. > +//! > +//! This module provides types for implements io-uring interface for char device. > +//! > +//! > +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and > +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h) > + > +use core::{mem::MaybeUninit, pin::Pin}; > + > +use crate::error::from_result; > +use crate::transmute::{AsBytes, FromBytes}; > +use crate::{fs::File, types::Opaque}; > + > +use crate::prelude::*; > + > +/// io-uring opcode > +pub mod opcode { > + /// opcode for uring cmd > + pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD; > +} > + > +/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure. > +/// > +/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd` > +/// binding from the Linux kernel. It represents a command structure used > +/// in io_uring operations within the kernel. > +/// This type is used internally by the io_uring subsystem to manage > +/// asynchronous I/O commands. > +/// > +/// This type should not be constructed or manipulated directly by > +/// kernel module developers. > +/// > +/// # INVARIANT > +/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`. > +#[repr(transparent)] > +pub struct IoUringCmd { > + /// An opaque wrapper containing the actual `io_uring_cmd` data. > + inner: Opaque<bindings::io_uring_cmd>, > +} > + > +impl IoUringCmd { > + /// Returns the cmd_op with associated with the `io_uring_cmd`. > + #[inline] > + pub fn cmd_op(&self) -> u32 { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + unsafe { (*self.inner.get()).cmd_op } > + } > + > + /// Returns the flags with associated with the `io_uring_cmd`. > + #[inline] > + pub fn flags(&self) -> u32 { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + unsafe { (*self.inner.get()).flags } > + } > + > + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd > + /// > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > + #[inline] > + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let inner = unsafe { &mut *self.inner.get() }; Why does this reference need to be mutable? > + > + let len = size_of::<T>(); > + if len > inner.pdu.len() { > + return Err(EFAULT); > + } > + > + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); Why is the intermediate MaybeUninit necessary? Would core::ptr::read_unaligned() not work? > + let ptr = &raw mut inner.pdu as *const c_void; > + > + // SAFETY: > + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. > + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked > + // size of `T` is smaller than pdu size. > + unsafe { > + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); > + } > + > + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements > + // `FromBytes`, any bit-pattern is a valid value for this type. > + Ok(unsafe { out.assume_init() }) > + } > + > + /// Writes the provided `value` to `pdu` in uring_cmd `self` > + /// > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > + #[inline] > + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let inner = unsafe { &mut *self.inner.get() }; > + > + let len = size_of::<T>(); > + if len > inner.pdu.len() { > + return Err(EFAULT); > + } > + > + let src = (value as *const T).cast::<c_void>(); > + let dst = &raw mut inner.pdu as *mut c_void; > + > + // SAFETY: > + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` > + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. > + // * It's safe to copy because size of `T` is no more than len of pdu. > + unsafe { > + core::ptr::copy_nonoverlapping(src, dst, len); > + } > + > + Ok(()) > + } > + > + /// Constructs a new [`IoUringCmd`] from a raw `io_uring_cmd` > + /// > + /// # Safety > + /// > + /// The caller must guarantee that: > + /// - `ptr` is non-null, properly aligned, and points to a valid > + /// `bindings::io_uring_cmd`. > + /// - The pointed-to memory remains initialized and valid for the entire > + /// lifetime `'a` of the returned reference. > + /// - While the returned `Pin<&'a mut IoUringCmd>` is alive, the underlying > + /// object is **not moved** (pinning requirement). > + /// - **Aliasing rules:** the returned `&mut` has **exclusive** access to the same > + /// object for its entire lifetime: > + /// - No other `&mut` **or** `&` references to the same `io_uring_cmd` may be > + /// alive at the same time. > + /// - There must be no concurrent reads/writes through raw pointers, FFI, or > + /// other kernel paths to the same object during this lifetime. > + /// - If the object can be touched from other contexts (e.g. IRQ/another CPU), > + /// the caller must provide synchronization to uphold this exclusivity. > + /// - This function relies on `IoUringCmd` being `repr(transparent)` over > + /// `bindings::io_uring_cmd` so the cast preserves layout. > + #[inline] > + pub unsafe fn from_raw<'a>(ptr: *mut bindings::io_uring_cmd) -> Pin<&'a mut IoUringCmd> { > + // SAFETY: > + // * The caller guarantees that the pointer is not dangling and stays > + // valid for the duration of 'a. > + // * The cast is okay because `IoUringCmd` is `repr(transparent)` and > + // has the same memory layout as `bindings::io_uring_cmd`. > + // * The returned `Pin` ensures that the object cannot be moved, which > + // is required because the kernel may hold pointers to this memory > + // location and moving it would invalidate those pointers. > + unsafe { Pin::new_unchecked(&mut *ptr.cast()) } > + } > + > + /// Returns the file that referenced by uring cmd self. > + #[inline] > + pub fn file(&self) -> &File { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let file = unsafe { (*self.inner.get()).file }; > + > + // SAFETY: > + // * The `file` points valid file. > + // * refcount is positive after submission queue entry issued. > + // * There is no active fdget_pos region on the file on this thread. > + unsafe { File::from_raw_file(file) } > + } > + > + /// Returns an reference to the [`IoUringSqe`] associated with this command. > + #[inline] > + pub fn sqe(&self) -> &IoUringSqe { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let sqe = unsafe { (*self.inner.get()).sqe }; > + // SAFETY: The call guarantees that the `sqe` points valid io_uring_sqe. > + unsafe { IoUringSqe::from_raw(sqe) } > + } > + > + /// Completes an this [`IoUringCmd`] request that was previously queued. > + /// > + /// # Safety > + /// > + /// - This function must be called **only** for a command whose `uring_cmd` > + /// handler previously returned **`-EIOCBQUEUED`** to io_uring. > + /// > + /// # Parameters > + /// > + /// - `ret`: Result to return to userspace. > + /// - `res2`: Extra for big completion queue entry `IORING_SETUP_CQE32`. > + /// - `issue_flags`: Flags associated with this request, typically the same > + /// as those passed to the `uring_cmd` handler. > + #[inline] > + pub fn done(self: Pin<&mut IoUringCmd>, ret: Result<i32>, res2: u64, issue_flags: u32) { Since this takes the IoUringCmd by reference, it allows a single io_uring_cmd to be completed multiple times, which would not be safe. This method probably needs to take ownership of the IoUringCmd. Even better would be to enforce at compile time that the number of times IoUringCmd::done() is called matches the return value from uring_cmd(): io_uring_cmd_done() may only be called if uring_cmd() returns -EIOCBQUEUED; -EAGAIN will result in another call to uring_cmd() and all other return values synchronously complete the io_uring_cmd. It's also not safe for the caller to pass an arbitrary value for issue_flags here; it needs to exactly match what was passed into uring_cmd(). Maybe we could introduce a type that couples the IoUringCmd and issue_flags passed to uring_cmd()? > + let ret = from_result(|| ret) as isize; > + // SAFETY: The call guarantees that `self.inner` is not dangling and stays valid > + unsafe { > + bindings::io_uring_cmd_done(self.inner.get(), ret, res2, issue_flags); > + } > + } > +} > + > +/// A Rust abstraction for the Linux kernel's `io_uring_sqe` structure. > +/// > +/// This structure is a safe, opaque wrapper around the raw C [`io_uring_sqe`](srctree/include/uapi/linux/io_uring.h) > +/// binding from the Linux kernel. It represents a Submission Queue Entry > +/// used in io_uring operations within the kernel. > +/// > +/// # Type Safety > +/// > +/// The `#[repr(transparent)]` attribute ensures that this wrapper has > +/// the same memory layout as the underlying `io_uring_sqe` structure, > +/// allowing it to be safely transmuted between the two representations. > +/// > +/// # Fields > +/// > +/// * `inner` - An opaque wrapper containing the actual `io_uring_sqe` data. > +/// The `Opaque` type prevents direct access to the internal > +/// structure fields, ensuring memory safety and encapsulation. > +/// > +/// # Usage > +/// > +/// This type represents a submission queue entry that describes an I/O > +/// operation to be executed by the io_uring subsystem. It contains > +/// information such as the operation type, file descriptor, buffer > +/// pointers, and other operation-specific data. > +/// > +/// Users can obtain this type from [`IoUringCmd::sqe()`] method, which > +/// extracts the submission queue entry associated with a command. > +/// > +/// This type should not be constructed or manipulated directly by > +/// kernel module developers. > +/// > +/// # INVARIANT > +/// - `self.inner` always points to a valid, live `bindings::io_uring_sqe`. > +#[repr(transparent)] > +pub struct IoUringSqe { > + inner: Opaque<bindings::io_uring_sqe>, > +} > + > +impl IoUringSqe { > + /// Reads and interprets the `cmd` field of an `bindings::io_uring_sqe` as a value of type `T`. > + /// > + /// # Safety & Invariants > + /// - Construction of `T` is delegated to `FromBytes`, which guarantees that `T` has no > + /// invalid bit patterns and can be safely reconstructed from raw bytes. > + /// - **Limitation:** This implementation does not support `IORING_SETUP_SQE128` (larger SQE entries). > + /// Only the standard `io_uring_sqe` layout is handled here. > + /// > + /// # Errors > + /// * Returns `EINVAL` if the `self` does not hold a `opcode::URING_CMD`. > + /// * Returns `EFAULT` if the command buffer is smaller than the requested type `T`. > + /// > + /// # Returns > + /// * On success, returns a `T` deserialized from the `cmd`. > + /// * On failure, returns an appropriate error as described above. > + pub fn cmd_data<T: FromBytes>(&self) -> Result<T> { > + // SAFETY: `self.inner` guaranteed by the type invariant to point > + // to a live `io_uring_sqe`, so dereferencing is safe. > + let sqe = unsafe { &*self.inner.get() }; > + > + if u32::from(sqe.opcode) != opcode::URING_CMD { io_uring opcodes are u8 values. Can the URING_CMD constant be made a u8 instead of converting sqe.opcode here? The read of the opcode should also be using read_volatile(), as it may lie in the userspace-mapped SQE region, which could be concurrently written by another userspace thread. That would probably be buggy behavior on the userspace side, but it can cause undefined behavior on the kernel side without a volatile read, as the compiler could choose to re-read the value multiple times assuming it will get the same value each time. > + return Err(EINVAL); > + } > + > + // SAFETY: Accessing the `sqe.cmd` union field is safe because we've > + // verified that `sqe.opcode == IORING_OP_URING_CMD`, which guarantees > + // that this union variant is initialized and valid. The opcode check isn't necessary. It doesn't provide any assurances that this variant of the union is actually initialized, since a buggy userspace process could set the opcode without initializing the cmd field. It's always valid to access this memory since it's part of the SQE region created at io_uring setup time. So I would drop the opcode check. > + let cmd = unsafe { sqe.__bindgen_anon_6.cmd.as_ref() }; > + let cmd_len = size_of_val(&sqe.__bindgen_anon_6.bindgen_union_field); > + > + if cmd_len < size_of::<T>() { > + return Err(EFAULT); > + } > + > + let cmd_ptr = cmd.as_ptr() as *mut T; > + > + // SAFETY: `cmd_ptr` is valid from `self.inner` which is guaranteed by > + // type variant. And also it points to initialized `T` from userspace. > + let ret = unsafe { core::ptr::read_unaligned(cmd_ptr) }; Similarly, needs to be volatile. The C uring_cmd() implementations use READ_ONCE() to read the cmd field. Best, Caleb > + > + Ok(ret) > + } > + > + /// Constructs a new `IoUringSqe` from a raw `io_uring_sqe`. > + /// > + /// # Safety > + /// > + /// The caller must guarantee that: > + /// - `ptr` is non-null, properly aligned, and points to a valid initialized > + /// `bindings::io_uring_sqe`. > + /// - The pointed-to memory remains valid (not freed or repurposed) for the > + /// entire lifetime `'a` of the returned reference. > + /// - **Aliasing rules (for `&T`):** while the returned `&'a IoUringSqe` is > + /// alive, there must be **no mutable access** to the same object through any > + /// path (no `&mut`, no raw-pointer writes, no FFI/IRQ/other-CPU writers). > + /// Multiple `&` is fine **only if all of them are read-only** for the entire > + /// overlapping lifetime. > + /// - This relies on `IoUringSqe` being `repr(transparent)` over > + /// `bindings::io_uring_sqe`, so the cast preserves layout. > + #[inline] > + pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe { > + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the > + // duration of 'a. The cast is okay because `IoUringSqe` is `repr(transparent)` and has the > + // same memory layout as `bindings::io_uring_sqe`. > + unsafe { &*ptr.cast() } > + } > +} > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > index ed53169e795c..d38cf7137401 100644 > --- a/rust/kernel/lib.rs > +++ b/rust/kernel/lib.rs > @@ -91,6 +91,7 @@ > pub mod fs; > pub mod init; > pub mod io; > +pub mod io_uring; > pub mod ioctl; > pub mod jump_label; > #[cfg(CONFIG_KUNIT)] > -- > 2.43.0 >
On Mon, Sep 01, 2025 at 06:11:01PM -0700, Caleb Sander Mateos wrote: > On Fri, Aug 22, 2025 at 5:56 AM Sidong Yang <sidong.yang@furiosa.ai> wrote: > > > > Implment the io-uring abstractions needed for miscdevicecs and other > > typo: "Implement" Thanks. > > > char devices that have io-uring command interface. > > > > * `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which > > will be used as arg for `MiscDevice::uring_cmd()`. And driver can get > > `cmd_op` sent from userspace. Also it has `flags` which includes option > > that is reissued. > > > > * `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which > > could be get from `IoUringCmd::sqe()` and driver could get `cmd_data` > > from userspace. Also `IoUringSqe` has more data like opcode could be used in > > driver. > > > > Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai> > > --- > > rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++ > > rust/kernel/lib.rs | 1 + > > 2 files changed, 307 insertions(+) > > create mode 100644 rust/kernel/io_uring.rs > > > > diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs > > new file mode 100644 > > index 000000000000..61e88bdf4e42 > > --- /dev/null > > +++ b/rust/kernel/io_uring.rs > > @@ -0,0 +1,306 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI > > + > > +//! Abstractions for io-uring. > > +//! > > +//! This module provides types for implements io-uring interface for char device. > > +//! > > +//! > > +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and > > +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h) > > + > > +use core::{mem::MaybeUninit, pin::Pin}; > > + > > +use crate::error::from_result; > > +use crate::transmute::{AsBytes, FromBytes}; > > +use crate::{fs::File, types::Opaque}; > > + > > +use crate::prelude::*; > > + > > +/// io-uring opcode > > +pub mod opcode { > > + /// opcode for uring cmd > > + pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD; > > +} > > + > > +/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure. > > +/// > > +/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd` > > +/// binding from the Linux kernel. It represents a command structure used > > +/// in io_uring operations within the kernel. > > +/// This type is used internally by the io_uring subsystem to manage > > +/// asynchronous I/O commands. > > +/// > > +/// This type should not be constructed or manipulated directly by > > +/// kernel module developers. > > +/// > > +/// # INVARIANT > > +/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`. > > +#[repr(transparent)] > > +pub struct IoUringCmd { > > + /// An opaque wrapper containing the actual `io_uring_cmd` data. > > + inner: Opaque<bindings::io_uring_cmd>, > > +} > > + w> > +impl IoUringCmd { > > + /// Returns the cmd_op with associated with the `io_uring_cmd`. > > + #[inline] > > + pub fn cmd_op(&self) -> u32 { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + unsafe { (*self.inner.get()).cmd_op } > > + } > > + > > + /// Returns the flags with associated with the `io_uring_cmd`. > > + #[inline] > > + pub fn flags(&self) -> u32 { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + unsafe { (*self.inner.get()).flags } > > + } > > + > > + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd > > + /// > > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > > + #[inline] > > + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let inner = unsafe { &mut *self.inner.get() }; > > Why does this reference need to be mutable? It don't need to be mutable. This could be borrow without mutable. > > > + > > + let len = size_of::<T>(); > > + if len > inner.pdu.len() { > > + return Err(EFAULT); > > + } > > + > > + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); > > Why is the intermediate MaybeUninit necessary? Would > core::ptr::read_unaligned() not work? It's okay to use `read_unaligned`. It would be simpler than now. Thanks. > > > + let ptr = &raw mut inner.pdu as *const c_void; > > + > > + // SAFETY: > > + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. > > + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked > > + // size of `T` is smaller than pdu size. > > + unsafe { > > + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); > > + } > > + > > + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements > > + // `FromBytes`, any bit-pattern is a valid value for this type. > > + Ok(unsafe { out.assume_init() }) > > + } > > + > > + /// Writes the provided `value` to `pdu` in uring_cmd `self` > > + /// > > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > > + #[inline] > > + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let inner = unsafe { &mut *self.inner.get() }; > > + > > + let len = size_of::<T>(); > > + if len > inner.pdu.len() { > > + return Err(EFAULT); > > + } > > + > > + let src = (value as *const T).cast::<c_void>(); > > + let dst = &raw mut inner.pdu as *mut c_void; > > + > > + // SAFETY: > > + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` > > + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. > > + // * It's safe to copy because size of `T` is no more than len of pdu. > > + unsafe { > > + core::ptr::copy_nonoverlapping(src, dst, len); > > + } > > + > > + Ok(()) > > + } > > + > > + /// Constructs a new [`IoUringCmd`] from a raw `io_uring_cmd` > > + /// > > + /// # Safety > > + /// > > + /// The caller must guarantee that: > > + /// - `ptr` is non-null, properly aligned, and points to a valid > > + /// `bindings::io_uring_cmd`. > > + /// - The pointed-to memory remains initialized and valid for the entire > > + /// lifetime `'a` of the returned reference. > > + /// - While the returned `Pin<&'a mut IoUringCmd>` is alive, the underlying > > + /// object is **not moved** (pinning requirement). > > + /// - **Aliasing rules:** the returned `&mut` has **exclusive** access to the same > > + /// object for its entire lifetime: > > + /// - No other `&mut` **or** `&` references to the same `io_uring_cmd` may be > > + /// alive at the same time. > > + /// - There must be no concurrent reads/writes through raw pointers, FFI, or > > + /// other kernel paths to the same object during this lifetime. > > + /// - If the object can be touched from other contexts (e.g. IRQ/another CPU), > > + /// the caller must provide synchronization to uphold this exclusivity. > > + /// - This function relies on `IoUringCmd` being `repr(transparent)` over > > + /// `bindings::io_uring_cmd` so the cast preserves layout. > > + #[inline] > > + pub unsafe fn from_raw<'a>(ptr: *mut bindings::io_uring_cmd) -> Pin<&'a mut IoUringCmd> { > > + // SAFETY: > > + // * The caller guarantees that the pointer is not dangling and stays > > + // valid for the duration of 'a. > > + // * The cast is okay because `IoUringCmd` is `repr(transparent)` and > > + // has the same memory layout as `bindings::io_uring_cmd`. > > + // * The returned `Pin` ensures that the object cannot be moved, which > > + // is required because the kernel may hold pointers to this memory > > + // location and moving it would invalidate those pointers. > > + unsafe { Pin::new_unchecked(&mut *ptr.cast()) } > > + } > > + > > + /// Returns the file that referenced by uring cmd self. > > + #[inline] > > + pub fn file(&self) -> &File { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let file = unsafe { (*self.inner.get()).file }; > > + > > + // SAFETY: ?> > + // * The `file` points valid file. > > + // * refcount is positive after submission queue entry issued. > > + // * There is no active fdget_pos region on the file on this thread. > > + unsafe { File::from_raw_file(file) } > > + } > > + > > + /// Returns an reference to the [`IoUringSqe`] associated with this command. > > + #[inline] > > + pub fn sqe(&self) -> &IoUringSqe { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let sqe = unsafe { (*self.inner.get()).sqe }; > > + // SAFETY: The call guarantees that the `sqe` points valid io_uring_sqe. > > + unsafe { IoUringSqe::from_raw(sqe) } > > + } > > + > > + /// Completes an this [`IoUringCmd`] request that was previously queued. > > + /// > > + /// # Safety > > + /// > > + /// - This function must be called **only** for a command whose `uring_cmd` > > + /// handler previously returned **`-EIOCBQUEUED`** to io_uring. > > + /// > > + /// # Parameters > > + /// > > + /// - `ret`: Result to return to userspace. > > + /// - `res2`: Extra for big completion queue entry `IORING_SETUP_CQE32`. > > + /// - `issue_flags`: Flags associated with this request, typically the same > > + /// as those passed to the `uring_cmd` handler. > > + #[inline] > > + pub fn done(self: Pin<&mut IoUringCmd>, ret: Result<i32>, res2: u64, issue_flags: u32) { > > Since this takes the IoUringCmd by reference, it allows a single > io_uring_cmd to be completed multiple times, which would not be safe. > This method probably needs to take ownership of the IoUringCmd. Even > better would be to enforce at compile time that the number of times > IoUringCmd::done() is called matches the return value from > uring_cmd(): io_uring_cmd_done() may only be called if uring_cmd() > returns -EIOCBQUEUED; -EAGAIN will result in another call to > uring_cmd() and all other return values synchronously complete the > io_uring_cmd. > > It's also not safe for the caller to pass an arbitrary value for > issue_flags here; it needs to exactly match what was passed into > uring_cmd(). Maybe we could introduce a type that couples the > IoUringCmd and issue_flags passed to uring_cmd()? Agreed. We could introduce a new type like below pub struct IoUringCmd<'a> { cmd: Pin<&'a mut IoUringCmdInner>, issue_flags: IssueFlags, } Is `io_uring_cmd_done` should be called in iopoll callback? If so, it's better to make new type for iopoll and move this function to the type. > > > + let ret = from_result(|| ret) as isize; > > + // SAFETY: The call guarantees that `self.inner` is not dangling and stays valid > > + unsafe { > > + bindings::io_uring_cmd_done(self.inner.get(), ret, res2, issue_flags); > > + } > > + } > > +} > > + > > +/// A Rust abstraction for the Linux kernel's `io_uring_sqe` structure. > > +/// > > +/// This structure is a safe, opaque wrapper around the raw C [`io_uring_sqe`](srctree/include/uapi/linux/io_uring.h) > > +/// binding from the Linux kernel. It represents a Submission Queue Entry > > +/// used in io_uring operations within the kernel. > > +/// > > +/// # Type Safety > > +/// > > +/// The `#[repr(transparent)]` attribute ensures that this wrapper has > > +/// the same memory layout as the underlying `io_uring_sqe` structure, > > +/// allowing it to be safely transmuted between the two representations. > > +/// > > +/// # Fields > > +/// > > +/// * `inner` - An opaque wrapper containing the actual `io_uring_sqe` data. > > +/// The `Opaque` type prevents direct access to the internal > > +/// structure fields, ensuring memory safety and encapsulation. > > +/// > > +/// # Usage > > +/// > > +/// This type represents a submission queue entry that describes an I/O > > +/// operation to be executed by the io_uring subsystem. It contains > > +/// information such as the operation type, file descriptor, buffer > > +/// pointers, and other operation-specific data. > > +/// > > +/// Users can obtain this type from [`IoUringCmd::sqe()`] method, which > > +/// extracts the submission queue entry associated with a command. > > +/// > > +/// This type should not be constructed or manipulated directly by > > +/// kernel module developers. > > +/// > > +/// # INVARIANT > > +/// - `self.inner` always points to a valid, live `bindings::io_uring_sqe`. > > +#[repr(transparent)] > > +pub struct IoUringSqe { > > + inner: Opaque<bindings::io_uring_sqe>, > > +} > > + > > +impl IoUringSqe { > > + /// Reads and interprets the `cmd` field of an `bindings::io_uring_sqe` as a value of type `T`. > > + /// > > + /// # Safety & Invariants > > + /// - Construction of `T` is delegated to `FromBytes`, which guarantees that `T` has no > > + /// invalid bit patterns and can be safely reconstructed from raw bytes. > > + /// - **Limitation:** This implementation does not support `IORING_SETUP_SQE128` (larger SQE entries). > > + /// Only the standard `io_uring_sqe` layout is handled here. > > + /// > > + /// # Errors > > + /// * Returns `EINVAL` if the `self` does not hold a `opcode::URING_CMD`. > > + /// * Returns `EFAULT` if the command buffer is smaller than the requested type `T`. > > + /// > > + /// # Returns > > + /// * On success, returns a `T` deserialized from the `cmd`. > > + /// * On failure, returns an appropriate error as described above. > > + pub fn cmd_data<T: FromBytes>(&self) -> Result<T> { > > + // SAFETY: `self.inner` guaranteed by the type invariant to point > > + // to a live `io_uring_sqe`, so dereferencing is safe. > > + let sqe = unsafe { &*self.inner.get() }; > > + > > + if u32::from(sqe.opcode) != opcode::URING_CMD { > > io_uring opcodes are u8 values. Can the URING_CMD constant be made a > u8 instead of converting sqe.opcode here? > > The read of the opcode should also be using read_volatile(), as it may > lie in the userspace-mapped SQE region, which could be concurrently > written by another userspace thread. That would probably be buggy > behavior on the userspace side, but it can cause undefined behavior on > the kernel side without a volatile read, as the compiler could choose > to re-read the value multiple times assuming it will get the same > value each time. Thanks, opcode should be read with read_volatile(). And I would introduce new type for opcode. > > > + return Err(EINVAL); > > + } > > + > > + // SAFETY: Accessing the `sqe.cmd` union field is safe because we've > > + // verified that `sqe.opcode == IORING_OP_URING_CMD`, which guarantees > > + // that this union variant is initialized and valid. > > The opcode check isn't necessary. It doesn't provide any assurances > that this variant of the union is actually initialized, since a buggy > userspace process could set the opcode without initializing the cmd > field. It's always valid to access this memory since it's part of the > SQE region created at io_uring setup time. So I would drop the opcode > check. > > > + let cmd = unsafe { sqe.__bindgen_anon_6.cmd.as_ref() }; > > + let cmd_len = size_of_val(&sqe.__bindgen_anon_6.bindgen_union_field); > > + > > + if cmd_len < size_of::<T>() { > > + return Err(EFAULT); > > + } > > + > > + let cmd_ptr = cmd.as_ptr() as *mut T; > > + > > + // SAFETY: `cmd_ptr` is valid from `self.inner` which is guaranteed by > > + // type variant. And also it points to initialized `T` from userspace. > > + let ret = unsafe { core::ptr::read_unaligned(cmd_ptr) }; > > Similarly, needs to be volatile. The C uring_cmd() implementations use > READ_ONCE() to read the cmd field. Okay, This should use read_volatile too. Thanks, Sidong > > Best, > Caleb > > > + > > + Ok(ret) > > + } > > + > > + /// Constructs a new `IoUringSqe` from a raw `io_uring_sqe`. > > + /// > > + /// # Safety > > + /// > > + /// The caller must guarantee that: > > + /// - `ptr` is non-null, properly aligned, and points to a valid initialized > > + /// `bindings::io_uring_sqe`. > > + /// - The pointed-to memory remains valid (not freed or repurposed) for the > > + /// entire lifetime `'a` of the returned reference. > > + /// - **Aliasing rules (for `&T`):** while the returned `&'a IoUringSqe` is > > + /// alive, there must be **no mutable access** to the same object through any > > + /// path (no `&mut`, no raw-pointer writes, no FFI/IRQ/other-CPU writers). > > + /// Multiple `&` is fine **only if all of them are read-only** for the entire > > + /// overlapping lifetime. > > + /// - This relies on `IoUringSqe` being `repr(transparent)` over > > + /// `bindings::io_uring_sqe`, so the cast preserves layout. > > + #[inline] > > + pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe { > > + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the > > + // duration of 'a. The cast is okay because `IoUringSqe` is `repr(transparent)` and has the > > + // same memory layout as `bindings::io_uring_sqe`. > > + unsafe { &*ptr.cast() } > > + } > > +} > > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > > index ed53169e795c..d38cf7137401 100644 > > --- a/rust/kernel/lib.rs > > +++ b/rust/kernel/lib.rs > > @@ -91,6 +91,7 @@ > > pub mod fs; > > pub mod init; > > pub mod io; > > +pub mod io_uring; > > pub mod ioctl; > > pub mod jump_label; > > #[cfg(CONFIG_KUNIT)] > > -- > > 2.43.0 > >
On Tue, Sep 2, 2025 at 4:11 AM Sidong Yang <sidong.yang@furiosa.ai> wrote: > > On Mon, Sep 01, 2025 at 06:11:01PM -0700, Caleb Sander Mateos wrote: > > On Fri, Aug 22, 2025 at 5:56 AM Sidong Yang <sidong.yang@furiosa.ai> wrote: > > > > > > Implment the io-uring abstractions needed for miscdevicecs and other > > > > typo: "Implement" > > Thanks. > > > > > char devices that have io-uring command interface. > > > > > > * `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which > > > will be used as arg for `MiscDevice::uring_cmd()`. And driver can get > > > `cmd_op` sent from userspace. Also it has `flags` which includes option > > > that is reissued. > > > > > > * `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which > > > could be get from `IoUringCmd::sqe()` and driver could get `cmd_data` > > > from userspace. Also `IoUringSqe` has more data like opcode could be used in > > > driver. > > > > > > Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai> > > > --- > > > rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++ > > > rust/kernel/lib.rs | 1 + > > > 2 files changed, 307 insertions(+) > > > create mode 100644 rust/kernel/io_uring.rs > > > > > > diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs > > > new file mode 100644 > > > index 000000000000..61e88bdf4e42 > > > --- /dev/null > > > +++ b/rust/kernel/io_uring.rs > > > @@ -0,0 +1,306 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI > > > + > > > +//! Abstractions for io-uring. > > > +//! > > > +//! This module provides types for implements io-uring interface for char device. > > > +//! > > > +//! > > > +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and > > > +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h) > > > + > > > +use core::{mem::MaybeUninit, pin::Pin}; > > > + > > > +use crate::error::from_result; > > > +use crate::transmute::{AsBytes, FromBytes}; > > > +use crate::{fs::File, types::Opaque}; > > > + > > > +use crate::prelude::*; > > > + > > > +/// io-uring opcode > > > +pub mod opcode { > > > + /// opcode for uring cmd > > > + pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD; > > > +} > > > + > > > +/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure. > > > +/// > > > +/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd` > > > +/// binding from the Linux kernel. It represents a command structure used > > > +/// in io_uring operations within the kernel. > > > +/// This type is used internally by the io_uring subsystem to manage > > > +/// asynchronous I/O commands. > > > +/// > > > +/// This type should not be constructed or manipulated directly by > > > +/// kernel module developers. > > > +/// > > > +/// # INVARIANT > > > +/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`. > > > +#[repr(transparent)] > > > +pub struct IoUringCmd { > > > + /// An opaque wrapper containing the actual `io_uring_cmd` data. > > > + inner: Opaque<bindings::io_uring_cmd>, > > > +} > > > + > w> > +impl IoUringCmd { > > > + /// Returns the cmd_op with associated with the `io_uring_cmd`. > > > + #[inline] > > > + pub fn cmd_op(&self) -> u32 { > > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > > + unsafe { (*self.inner.get()).cmd_op } > > > + } > > > + > > > + /// Returns the flags with associated with the `io_uring_cmd`. > > > + #[inline] > > > + pub fn flags(&self) -> u32 { > > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > > + unsafe { (*self.inner.get()).flags } > > > + } > > > + > > > + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd > > > + /// > > > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > > > + #[inline] > > > + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { > > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > > + let inner = unsafe { &mut *self.inner.get() }; > > > > Why does this reference need to be mutable? > > It don't need to be mutable. This could be borrow without mutable. > > > > > + > > > + let len = size_of::<T>(); > > > + if len > inner.pdu.len() { > > > + return Err(EFAULT); > > > + } > > > + > > > + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); > > > > Why is the intermediate MaybeUninit necessary? Would > > core::ptr::read_unaligned() not work? > > It's okay to use `read_unaligned`. It would be simpler than now. Thanks. > > > > > > + let ptr = &raw mut inner.pdu as *const c_void; > > > + > > > + // SAFETY: > > > + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. > > > + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked > > > + // size of `T` is smaller than pdu size. > > > + unsafe { > > > + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); > > > + } > > > + > > > + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements > > > + // `FromBytes`, any bit-pattern is a valid value for this type. > > > + Ok(unsafe { out.assume_init() }) > > > + } > > > + > > > + /// Writes the provided `value` to `pdu` in uring_cmd `self` > > > + /// > > > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > > > + #[inline] > > > + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { > > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > > + let inner = unsafe { &mut *self.inner.get() }; > > > + > > > + let len = size_of::<T>(); > > > + if len > inner.pdu.len() { > > > + return Err(EFAULT); > > > + } > > > + > > > + let src = (value as *const T).cast::<c_void>(); > > > + let dst = &raw mut inner.pdu as *mut c_void; > > > + > > > + // SAFETY: > > > + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` > > > + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. > > > + // * It's safe to copy because size of `T` is no more than len of pdu. > > > + unsafe { > > > + core::ptr::copy_nonoverlapping(src, dst, len); > > > + } > > > + > > > + Ok(()) > > > + } > > > + > > > + /// Constructs a new [`IoUringCmd`] from a raw `io_uring_cmd` > > > + /// > > > + /// # Safety > > > + /// > > > + /// The caller must guarantee that: > > > + /// - `ptr` is non-null, properly aligned, and points to a valid > > > + /// `bindings::io_uring_cmd`. > > > + /// - The pointed-to memory remains initialized and valid for the entire > > > + /// lifetime `'a` of the returned reference. > > > + /// - While the returned `Pin<&'a mut IoUringCmd>` is alive, the underlying > > > + /// object is **not moved** (pinning requirement). > > > + /// - **Aliasing rules:** the returned `&mut` has **exclusive** access to the same > > > + /// object for its entire lifetime: > > > + /// - No other `&mut` **or** `&` references to the same `io_uring_cmd` may be > > > + /// alive at the same time. > > > + /// - There must be no concurrent reads/writes through raw pointers, FFI, or > > > + /// other kernel paths to the same object during this lifetime. > > > + /// - If the object can be touched from other contexts (e.g. IRQ/another CPU), > > > + /// the caller must provide synchronization to uphold this exclusivity. > > > + /// - This function relies on `IoUringCmd` being `repr(transparent)` over > > > + /// `bindings::io_uring_cmd` so the cast preserves layout. > > > + #[inline] > > > + pub unsafe fn from_raw<'a>(ptr: *mut bindings::io_uring_cmd) -> Pin<&'a mut IoUringCmd> { > > > + // SAFETY: > > > + // * The caller guarantees that the pointer is not dangling and stays > > > + // valid for the duration of 'a. > > > + // * The cast is okay because `IoUringCmd` is `repr(transparent)` and > > > + // has the same memory layout as `bindings::io_uring_cmd`. > > > + // * The returned `Pin` ensures that the object cannot be moved, which > > > + // is required because the kernel may hold pointers to this memory > > > + // location and moving it would invalidate those pointers. > > > + unsafe { Pin::new_unchecked(&mut *ptr.cast()) } > > > + } > > > + > > > + /// Returns the file that referenced by uring cmd self. > > > + #[inline] > > > + pub fn file(&self) -> &File { > > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > > + let file = unsafe { (*self.inner.get()).file }; > > > + > > > + // SAFETY: > ?> > + // * The `file` points valid file. > > > + // * refcount is positive after submission queue entry issued. > > > + // * There is no active fdget_pos region on the file on this thread. > > > + unsafe { File::from_raw_file(file) } > > > + } > > > + > > > + /// Returns an reference to the [`IoUringSqe`] associated with this command. > > > + #[inline] > > > + pub fn sqe(&self) -> &IoUringSqe { > > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > > + let sqe = unsafe { (*self.inner.get()).sqe }; > > > + // SAFETY: The call guarantees that the `sqe` points valid io_uring_sqe. > > > + unsafe { IoUringSqe::from_raw(sqe) } > > > + } > > > + > > > + /// Completes an this [`IoUringCmd`] request that was previously queued. > > > + /// > > > + /// # Safety > > > + /// > > > + /// - This function must be called **only** for a command whose `uring_cmd` > > > + /// handler previously returned **`-EIOCBQUEUED`** to io_uring. > > > + /// > > > + /// # Parameters > > > + /// > > > + /// - `ret`: Result to return to userspace. > > > + /// - `res2`: Extra for big completion queue entry `IORING_SETUP_CQE32`. > > > + /// - `issue_flags`: Flags associated with this request, typically the same > > > + /// as those passed to the `uring_cmd` handler. > > > + #[inline] > > > + pub fn done(self: Pin<&mut IoUringCmd>, ret: Result<i32>, res2: u64, issue_flags: u32) { > > > > Since this takes the IoUringCmd by reference, it allows a single > > io_uring_cmd to be completed multiple times, which would not be safe. > > This method probably needs to take ownership of the IoUringCmd. Even > > better would be to enforce at compile time that the number of times > > IoUringCmd::done() is called matches the return value from > > uring_cmd(): io_uring_cmd_done() may only be called if uring_cmd() > > returns -EIOCBQUEUED; -EAGAIN will result in another call to > > uring_cmd() and all other return values synchronously complete the > > io_uring_cmd. > > > > It's also not safe for the caller to pass an arbitrary value for > > issue_flags here; it needs to exactly match what was passed into > > uring_cmd(). Maybe we could introduce a type that couples the > > IoUringCmd and issue_flags passed to uring_cmd()? > > Agreed. We could introduce a new type like below > > pub struct IoUringCmd<'a> { > cmd: Pin<&'a mut IoUringCmdInner>, > issue_flags: IssueFlags, > } Yeah, that looks reasonable. > > Is `io_uring_cmd_done` should be called in iopoll callback? If so, it's > better to make new type for iopoll and move this function to the type. I'm not too familiar with IOPOLL. As far as I'm aware, only the NVMe passthru uring_cmd() implementation supports it? It looks like commit 9ce6c9875f3e ("nvme: always punt polled uring_cmd end_io work to task_work") removed the IOPOLL special case, so all NVMe passthru completions go through nvme_uring_task_cb(), which calls io_uring_cmd_done(). > > > > > > + let ret = from_result(|| ret) as isize; > > > + // SAFETY: The call guarantees that `self.inner` is not dangling and stays valid > > > + unsafe { > > > + bindings::io_uring_cmd_done(self.inner.get(), ret, res2, issue_flags); > > > + } > > > + } > > > +} > > > + > > > +/// A Rust abstraction for the Linux kernel's `io_uring_sqe` structure. > > > +/// > > > +/// This structure is a safe, opaque wrapper around the raw C [`io_uring_sqe`](srctree/include/uapi/linux/io_uring.h) > > > +/// binding from the Linux kernel. It represents a Submission Queue Entry > > > +/// used in io_uring operations within the kernel. > > > +/// > > > +/// # Type Safety > > > +/// > > > +/// The `#[repr(transparent)]` attribute ensures that this wrapper has > > > +/// the same memory layout as the underlying `io_uring_sqe` structure, > > > +/// allowing it to be safely transmuted between the two representations. > > > +/// > > > +/// # Fields > > > +/// > > > +/// * `inner` - An opaque wrapper containing the actual `io_uring_sqe` data. > > > +/// The `Opaque` type prevents direct access to the internal > > > +/// structure fields, ensuring memory safety and encapsulation. > > > +/// > > > +/// # Usage > > > +/// > > > +/// This type represents a submission queue entry that describes an I/O > > > +/// operation to be executed by the io_uring subsystem. It contains > > > +/// information such as the operation type, file descriptor, buffer > > > +/// pointers, and other operation-specific data. > > > +/// > > > +/// Users can obtain this type from [`IoUringCmd::sqe()`] method, which > > > +/// extracts the submission queue entry associated with a command. > > > +/// > > > +/// This type should not be constructed or manipulated directly by > > > +/// kernel module developers. > > > +/// > > > +/// # INVARIANT > > > +/// - `self.inner` always points to a valid, live `bindings::io_uring_sqe`. > > > +#[repr(transparent)] > > > +pub struct IoUringSqe { > > > + inner: Opaque<bindings::io_uring_sqe>, > > > +} > > > + > > > +impl IoUringSqe { > > > + /// Reads and interprets the `cmd` field of an `bindings::io_uring_sqe` as a value of type `T`. > > > + /// > > > + /// # Safety & Invariants > > > + /// - Construction of `T` is delegated to `FromBytes`, which guarantees that `T` has no > > > + /// invalid bit patterns and can be safely reconstructed from raw bytes. > > > + /// - **Limitation:** This implementation does not support `IORING_SETUP_SQE128` (larger SQE entries). > > > + /// Only the standard `io_uring_sqe` layout is handled here. > > > + /// > > > + /// # Errors > > > + /// * Returns `EINVAL` if the `self` does not hold a `opcode::URING_CMD`. > > > + /// * Returns `EFAULT` if the command buffer is smaller than the requested type `T`. > > > + /// > > > + /// # Returns > > > + /// * On success, returns a `T` deserialized from the `cmd`. > > > + /// * On failure, returns an appropriate error as described above. > > > + pub fn cmd_data<T: FromBytes>(&self) -> Result<T> { > > > + // SAFETY: `self.inner` guaranteed by the type invariant to point > > > + // to a live `io_uring_sqe`, so dereferencing is safe. > > > + let sqe = unsafe { &*self.inner.get() }; > > > + > > > + if u32::from(sqe.opcode) != opcode::URING_CMD { > > > > io_uring opcodes are u8 values. Can the URING_CMD constant be made a > > u8 instead of converting sqe.opcode here? > > > > The read of the opcode should also be using read_volatile(), as it may > > lie in the userspace-mapped SQE region, which could be concurrently > > written by another userspace thread. That would probably be buggy > > behavior on the userspace side, but it can cause undefined behavior on > > the kernel side without a volatile read, as the compiler could choose > > to re-read the value multiple times assuming it will get the same > > value each time. > > Thanks, opcode should be read with read_volatile(). And I would introduce new type > for opcode. I would rather remove the opcode check entirely. As mentioned below, it's not required for safety, it doesn't actually ensure that userspace has written to the cmd field, and it adds overhead in a hot path. Best, Caleb > > > > > > + return Err(EINVAL); > > > + } > > > + > > > + // SAFETY: Accessing the `sqe.cmd` union field is safe because we've > > > + // verified that `sqe.opcode == IORING_OP_URING_CMD`, which guarantees > > > + // that this union variant is initialized and valid. > > > > The opcode check isn't necessary. It doesn't provide any assurances > > that this variant of the union is actually initialized, since a buggy > > userspace process could set the opcode without initializing the cmd > > field. It's always valid to access this memory since it's part of the > > SQE region created at io_uring setup time. So I would drop the opcode > > check. > > > > > + let cmd = unsafe { sqe.__bindgen_anon_6.cmd.as_ref() }; > > > + let cmd_len = size_of_val(&sqe.__bindgen_anon_6.bindgen_union_field); > > > + > > > + if cmd_len < size_of::<T>() { > > > + return Err(EFAULT); > > > + } > > > + > > > + let cmd_ptr = cmd.as_ptr() as *mut T; > > > + > > > + // SAFETY: `cmd_ptr` is valid from `self.inner` which is guaranteed by > > > + // type variant. And also it points to initialized `T` from userspace. > > > + let ret = unsafe { core::ptr::read_unaligned(cmd_ptr) }; > > > > Similarly, needs to be volatile. The C uring_cmd() implementations use > > READ_ONCE() to read the cmd field. > > Okay, This should use read_volatile too. > > Thanks, > Sidong > > > > Best, > > Caleb > > > > > + > > > + Ok(ret) > > > + } > > > + > > > + /// Constructs a new `IoUringSqe` from a raw `io_uring_sqe`. > > > + /// > > > + /// # Safety > > > + /// > > > + /// The caller must guarantee that: > > > + /// - `ptr` is non-null, properly aligned, and points to a valid initialized > > > + /// `bindings::io_uring_sqe`. > > > + /// - The pointed-to memory remains valid (not freed or repurposed) for the > > > + /// entire lifetime `'a` of the returned reference. > > > + /// - **Aliasing rules (for `&T`):** while the returned `&'a IoUringSqe` is > > > + /// alive, there must be **no mutable access** to the same object through any > > > + /// path (no `&mut`, no raw-pointer writes, no FFI/IRQ/other-CPU writers). > > > + /// Multiple `&` is fine **only if all of them are read-only** for the entire > > > + /// overlapping lifetime. > > > + /// - This relies on `IoUringSqe` being `repr(transparent)` over > > > + /// `bindings::io_uring_sqe`, so the cast preserves layout. > > > + #[inline] > > > + pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe { > > > + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the > > > + // duration of 'a. The cast is okay because `IoUringSqe` is `repr(transparent)` and has the > > > + // same memory layout as `bindings::io_uring_sqe`. > > > + unsafe { &*ptr.cast() } > > > + } > > > +} > > > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > > > index ed53169e795c..d38cf7137401 100644 > > > --- a/rust/kernel/lib.rs > > > +++ b/rust/kernel/lib.rs > > > @@ -91,6 +91,7 @@ > > > pub mod fs; > > > pub mod init; > > > pub mod io; > > > +pub mod io_uring; > > > pub mod ioctl; > > > pub mod jump_label; > > > #[cfg(CONFIG_KUNIT)] > > > -- > > > 2.43.0 > > >
On Fri, Aug 22, 2025 at 12:55:53PM +0000, Sidong Yang wrote: > Implment the io-uring abstractions needed for miscdevicecs and other > char devices that have io-uring command interface. > > * `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which > will be used as arg for `MiscDevice::uring_cmd()`. And driver can get > `cmd_op` sent from userspace. Also it has `flags` which includes option > that is reissued. > > * `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which > could be get from `IoUringCmd::sqe()` and driver could get `cmd_data` > from userspace. Also `IoUringSqe` has more data like opcode could be used in > driver. > > Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai> > --- > rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++ > rust/kernel/lib.rs | 1 + > 2 files changed, 307 insertions(+) > create mode 100644 rust/kernel/io_uring.rs > > diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs > new file mode 100644 > index 000000000000..61e88bdf4e42 > --- /dev/null > +++ b/rust/kernel/io_uring.rs > @@ -0,0 +1,306 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI > + > +//! Abstractions for io-uring. > +//! > +//! This module provides types for implements io-uring interface for char device. > +//! > +//! > +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and > +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h) > + > +use core::{mem::MaybeUninit, pin::Pin}; > + > +use crate::error::from_result; > +use crate::transmute::{AsBytes, FromBytes}; > +use crate::{fs::File, types::Opaque}; > + > +use crate::prelude::*; > + > +/// io-uring opcode > +pub mod opcode { > + /// opcode for uring cmd > + pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD; > +} > + > +/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure. > +/// > +/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd` > +/// binding from the Linux kernel. It represents a command structure used > +/// in io_uring operations within the kernel. > +/// This type is used internally by the io_uring subsystem to manage > +/// asynchronous I/O commands. > +/// > +/// This type should not be constructed or manipulated directly by > +/// kernel module developers. > +/// > +/// # INVARIANT > +/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`. > +#[repr(transparent)] > +pub struct IoUringCmd { > + /// An opaque wrapper containing the actual `io_uring_cmd` data. > + inner: Opaque<bindings::io_uring_cmd>, > +} > + > +impl IoUringCmd { > + /// Returns the cmd_op with associated with the `io_uring_cmd`. > + #[inline] > + pub fn cmd_op(&self) -> u32 { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + unsafe { (*self.inner.get()).cmd_op } > + } > + > + /// Returns the flags with associated with the `io_uring_cmd`. > + #[inline] > + pub fn flags(&self) -> u32 { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + unsafe { (*self.inner.get()).flags } > + } > + > + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd > + /// > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > + #[inline] > + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let inner = unsafe { &mut *self.inner.get() }; > + > + let len = size_of::<T>(); > + if len > inner.pdu.len() { > + return Err(EFAULT); > + } > + > + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); > + let ptr = &raw mut inner.pdu as *const c_void; > + > + // SAFETY: > + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. > + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked > + // size of `T` is smaller than pdu size. > + unsafe { > + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); > + } > + > + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements > + // `FromBytes`, any bit-pattern is a valid value for this type. > + Ok(unsafe { out.assume_init() }) > + } > + > + /// Writes the provided `value` to `pdu` in uring_cmd `self` > + /// > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > + #[inline] > + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let inner = unsafe { &mut *self.inner.get() }; > + > + let len = size_of::<T>(); > + if len > inner.pdu.len() { > + return Err(EFAULT); > + } > + > + let src = (value as *const T).cast::<c_void>(); > + let dst = &raw mut inner.pdu as *mut c_void; > + > + // SAFETY: > + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` > + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. > + // * It's safe to copy because size of `T` is no more than len of pdu. > + unsafe { > + core::ptr::copy_nonoverlapping(src, dst, len); > + } > + > + Ok(()) > + } pdu is part of IoUringCmd, which is live in the whole uring_cmd lifetime. But both read_pdu()/write_pdu() needs copy to read or write any byte in the pdu, which is slow and hard to use, it could be more efficient to add two methods to return Result<&T> and Result<mut &T> for user to manipulate uring_cmd's pdu. Thanks, Ming
On Thu Aug 28, 2025 at 2:36 AM CEST, Ming Lei wrote: > On Fri, Aug 22, 2025 at 12:55:53PM +0000, Sidong Yang wrote: >> + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd >> + /// >> + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. >> + #[inline] >> + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { >> + // SAFETY: `self.inner` is guaranteed by the type invariant to point >> + // to a live `io_uring_cmd`, so dereferencing is safe. >> + let inner = unsafe { &mut *self.inner.get() }; >> + >> + let len = size_of::<T>(); >> + if len > inner.pdu.len() { >> + return Err(EFAULT); >> + } >> + >> + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); >> + let ptr = &raw mut inner.pdu as *const c_void; >> + >> + // SAFETY: >> + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. >> + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked >> + // size of `T` is smaller than pdu size. >> + unsafe { >> + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); >> + } >> + >> + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements >> + // `FromBytes`, any bit-pattern is a valid value for this type. >> + Ok(unsafe { out.assume_init() }) >> + } >> + >> + /// Writes the provided `value` to `pdu` in uring_cmd `self` >> + /// >> + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. >> + #[inline] >> + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { >> + // SAFETY: `self.inner` is guaranteed by the type invariant to point >> + // to a live `io_uring_cmd`, so dereferencing is safe. >> + let inner = unsafe { &mut *self.inner.get() }; >> + >> + let len = size_of::<T>(); >> + if len > inner.pdu.len() { >> + return Err(EFAULT); >> + } >> + >> + let src = (value as *const T).cast::<c_void>(); >> + let dst = &raw mut inner.pdu as *mut c_void; >> + >> + // SAFETY: >> + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` >> + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. >> + // * It's safe to copy because size of `T` is no more than len of pdu. >> + unsafe { >> + core::ptr::copy_nonoverlapping(src, dst, len); >> + } >> + >> + Ok(()) >> + } > > pdu is part of IoUringCmd, which is live in the whole uring_cmd lifetime. But > both read_pdu()/write_pdu() needs copy to read or write any byte in the pdu, which > is slow and hard to use, it could be more efficient to add two methods to return > Result<&T> and Result<mut &T> for user to manipulate uring_cmd's pdu. That can also be useful, but you do need to ensure that the pdu is aligned to at least the required alignment of `T` for this to be sound. I also don't follow your argument that reading & writing the pdu is slow. --- Cheers, Benno
On Thu, Aug 28, 2025 at 09:25:56AM +0200, Benno Lossin wrote: > On Thu Aug 28, 2025 at 2:36 AM CEST, Ming Lei wrote: > > On Fri, Aug 22, 2025 at 12:55:53PM +0000, Sidong Yang wrote: > >> + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd > >> + /// > >> + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > >> + #[inline] > >> + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { > >> + // SAFETY: `self.inner` is guaranteed by the type invariant to point > >> + // to a live `io_uring_cmd`, so dereferencing is safe. > >> + let inner = unsafe { &mut *self.inner.get() }; > >> + > >> + let len = size_of::<T>(); > >> + if len > inner.pdu.len() { > >> + return Err(EFAULT); > >> + } > >> + > >> + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); > >> + let ptr = &raw mut inner.pdu as *const c_void; > >> + > >> + // SAFETY: > >> + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. > >> + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked > >> + // size of `T` is smaller than pdu size. > >> + unsafe { > >> + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); > >> + } > >> + > >> + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements > >> + // `FromBytes`, any bit-pattern is a valid value for this type. > >> + Ok(unsafe { out.assume_init() }) > >> + } > >> + > >> + /// Writes the provided `value` to `pdu` in uring_cmd `self` > >> + /// > >> + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > >> + #[inline] > >> + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { > >> + // SAFETY: `self.inner` is guaranteed by the type invariant to point > >> + // to a live `io_uring_cmd`, so dereferencing is safe. > >> + let inner = unsafe { &mut *self.inner.get() }; > >> + > >> + let len = size_of::<T>(); > >> + if len > inner.pdu.len() { > >> + return Err(EFAULT); > >> + } > >> + > >> + let src = (value as *const T).cast::<c_void>(); > >> + let dst = &raw mut inner.pdu as *mut c_void; > >> + > >> + // SAFETY: > >> + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` > >> + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. > >> + // * It's safe to copy because size of `T` is no more than len of pdu. > >> + unsafe { > >> + core::ptr::copy_nonoverlapping(src, dst, len); > >> + } > >> + > >> + Ok(()) > >> + } > > > > pdu is part of IoUringCmd, which is live in the whole uring_cmd lifetime. But > > both read_pdu()/write_pdu() needs copy to read or write any byte in the pdu, which > > is slow and hard to use, it could be more efficient to add two methods to return > > Result<&T> and Result<mut &T> for user to manipulate uring_cmd's pdu. > > That can also be useful, but you do need to ensure that the pdu is > aligned to at least the required alignment of `T` for this to be sound. Yes, `IoUringCmd` is actually one C struct, so `T` is supposed to be `#[repr(C)]` too, and the usage should be just like what io_uring_cmd_to_pdu() provides. > I also don't follow your argument that reading & writing the pdu is > slow. Please look at how pdu is used in existing C users, such as, the tag field of `pdu` can be read/write directly via: `io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu)->tag` but read_pdu()/write_pdu() needs whole 32bytes copy for read/write any single field. User may only need to store single byte data in pdu... Thanks, Ming
Hi Sidong, > On 22 Aug 2025, at 09:55, Sidong Yang <sidong.yang@furiosa.ai> wrote: > > Implment the io-uring abstractions needed for miscdevicecs and other > char devices that have io-uring command interface. Can you expand on this last part? > > * `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which > will be used as arg for `MiscDevice::uring_cmd()`. And driver can get > `cmd_op` sent from userspace. Also it has `flags` which includes option > that is reissued. > This is a bit hard to parse. > * `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which > could be get from `IoUringCmd::sqe()` and driver could get `cmd_data` > from userspace. Also `IoUringSqe` has more data like opcode could be used in > driver. Same here. > > Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai> > --- > rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++ > rust/kernel/lib.rs | 1 + > 2 files changed, 307 insertions(+) > create mode 100644 rust/kernel/io_uring.rs > > diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs > new file mode 100644 > index 000000000000..61e88bdf4e42 > --- /dev/null > +++ b/rust/kernel/io_uring.rs > @@ -0,0 +1,306 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI > + > +//! Abstractions for io-uring. > +//! > +//! This module provides types for implements io-uring interface for char device. This is also hard to parse. > +//! > +//! > +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and > +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h) > + > +use core::{mem::MaybeUninit, pin::Pin}; > + > +use crate::error::from_result; > +use crate::transmute::{AsBytes, FromBytes}; > +use crate::{fs::File, types::Opaque}; > + > +use crate::prelude::*; > + > +/// io-uring opcode /// `IoUring` opcodes. Notice: a) The capitalization, b) The use of backticks, c) The period in the end. This is an ongoing effort to keep the docs tidy :) > +pub mod opcode { > + /// opcode for uring cmd > + pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD; > +} Should this be its own type? This way we can use the type system to enforce that only valid opcodes are used where an opcode is expected. > + > +/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure. /// A Rust abstraction for `io_uring_cmd`. > +/// > +/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd` > +/// binding from the Linux kernel. It represents a command structure used > +/// in io_uring operations within the kernel. This code will also be part of the kernel, so mentioning “the Linux kernel” is superfluous. > +/// This type is used internally by the io_uring subsystem to manage > +/// asynchronous I/O commands. > +/// > +/// This type should not be constructed or manipulated directly by > +/// kernel module developers. “…by drivers”. > +/// > +/// # INVARIANT /// # Invariants > +/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`. Blank here > +#[repr(transparent)] > +pub struct IoUringCmd { > + /// An opaque wrapper containing the actual `io_uring_cmd` data. > + inner: Opaque<bindings::io_uring_cmd>, > +} > + > +impl IoUringCmd { > + /// Returns the cmd_op with associated with the `io_uring_cmd`. This sentence does not parse very well. > + #[inline] > + pub fn cmd_op(&self) -> u32 { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + unsafe { (*self.inner.get()).cmd_op } Perhaps add an as_raw() method so this becomes: unsafe {*self.as_raw()}.cmd_op > + } > + > + /// Returns the flags with associated with the `io_uring_cmd`. With the command, or something like that. The user doesn’t see the raw bindings::io_uring_cmd so we shouldn’t be mentioning it if we can help it. > + #[inline] > + pub fn flags(&self) -> u32 { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + unsafe { (*self.inner.get()).flags } > + } > + > + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd This sentence does not parse very well. > + /// > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. /// # Errors > + #[inline] > + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { This takes &self, > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let inner = unsafe { &mut *self.inner.get() }; But this creates a &mut to self.inner using unsafe code. Avoid doing this in general. All of a sudden your type is not thread-safe anymore. If you need to mutate &self here, then take &mut self as an argument. > + > + let len = size_of::<T>(); > + if len > inner.pdu.len() { > + return Err(EFAULT); EFAULT? How about EINVAL? > + } > + > + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); > + let ptr = &raw mut inner.pdu as *const c_void; > + > + // SAFETY: > + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. > + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked > + // size of `T` is smaller than pdu size. > + unsafe { > + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); I don’t think you need to manually specify c_void here. Benno, can’t we use core::mem::zeroed() or something like that to avoid this unsafe? The input was zeroed in prep() and the output can just be a zeroed T on the stack, unless I missed something? > + } > + > + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements > + // `FromBytes`, any bit-pattern is a valid value for this type. > + Ok(unsafe { out.assume_init() }) > + } > + > + /// Writes the provided `value` to `pdu` in uring_cmd `self` Writes the provided `value` to `pdu`. > + /// /// # Errors /// > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > + #[inline] > + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let inner = unsafe { &mut *self.inner.get() }; > + > + let len = size_of::<T>(); > + if len > inner.pdu.len() { > + return Err(EFAULT); > + } > + > + let src = (value as *const T).cast::<c_void>(); as_ptr().cast() > + let dst = &raw mut inner.pdu as *mut c_void; (&raw mut inner.pdu).cast() > + > + // SAFETY: > + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` > + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. > + // * It's safe to copy because size of `T` is no more than len of pdu. > + unsafe { > + core::ptr::copy_nonoverlapping(src, dst, len); > + } > + > + Ok(()) > + } > + > + /// Constructs a new [`IoUringCmd`] from a raw `io_uring_cmd` Missing period. > + /// > + /// # Safety > + /// > + /// The caller must guarantee that: > + /// - `ptr` is non-null, properly aligned, and points to a valid > + /// `bindings::io_uring_cmd`. Blanks for every bullet point, please. > + /// - The pointed-to memory remains initialized and valid for the entire > + /// lifetime `'a` of the returned reference. > + /// - While the returned `Pin<&'a mut IoUringCmd>` is alive, the underlying > + /// object is **not moved** (pinning requirement). They can’t move an !Unpin type in safe code. > + /// - **Aliasing rules:** the returned `&mut` has **exclusive** access to the same > + /// object for its entire lifetime: You really don’t need to emphasize these. > + /// - No other `&mut` **or** `&` references to the same `io_uring_cmd` may be > + /// alive at the same time. This and the point above are identical. > + /// - There must be no concurrent reads/writes through raw pointers, FFI, or > + /// other kernel paths to the same object during this lifetime. This and the first point say the same thing. > + /// - If the object can be touched from other contexts (e.g. IRQ/another CPU), > + /// the caller must provide synchronization to uphold this exclusivity. I am not sure what you mean. > + /// - This function relies on `IoUringCmd` being `repr(transparent)` over > + /// `bindings::io_uring_cmd` so the cast preserves layout. This is not a safety requirement. Just adapt the requirements from other instances of from_raw(), they all convert a *mut T to a &T so the safety requirements are similar. > + #[inline] > + pub unsafe fn from_raw<'a>(ptr: *mut bindings::io_uring_cmd) -> Pin<&'a mut IoUringCmd> { Why is this pub? Sounds like a massive footgun? This should be private or at best pub(crate). > + // SAFETY: > + // * The caller guarantees that the pointer is not dangling and stays > + // valid for the duration of 'a. > + // * The cast is okay because `IoUringCmd` is `repr(transparent)` and > + // has the same memory layout as `bindings::io_uring_cmd`. > + // * The returned `Pin` ensures that the object cannot be moved, which > + // is required because the kernel may hold pointers to this memory > + // location and moving it would invalidate those pointers. > + unsafe { Pin::new_unchecked(&mut *ptr.cast()) } > + } > + > + /// Returns the file that referenced by uring cmd self. > + #[inline] > + pub fn file(&self) -> &File { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let file = unsafe { (*self.inner.get()).file }; > + > + // SAFETY: > + // * The `file` points valid file. Why? > + // * refcount is positive after submission queue entry issued. > + // * There is no active fdget_pos region on the file on this thread. > + unsafe { File::from_raw_file(file) } > + } > + > + /// Returns an reference to the [`IoUringSqe`] associated with this command. s/an/a > + #[inline] > + pub fn sqe(&self) -> &IoUringSqe { > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > + // to a live `io_uring_cmd`, so dereferencing is safe. > + let sqe = unsafe { (*self.inner.get()).sqe }; > + // SAFETY: The call guarantees that the `sqe` points valid io_uring_sqe. What do you mean by “the call guarantees” ? > + unsafe { IoUringSqe::from_raw(sqe) } > + } > + > + /// Completes an this [`IoUringCmd`] request that was previously queued. This sentence does not parse very well. > + /// > + /// # Safety > + /// > + /// - This function must be called **only** for a command whose `uring_cmd` Please no emphasis. > + /// handler previously returned **`-EIOCBQUEUED`** to io_uring. To what? Are you referring to a Rust type, or to the C part of the kernel? > + /// > + /// # Parameters > + /// > + /// - `ret`: Result to return to userspace. > + /// - `res2`: Extra for big completion queue entry `IORING_SETUP_CQE32`. This sentence does not parse very well. Also, can you rename this? > + /// - `issue_flags`: Flags associated with this request, typically the same > + /// as those passed to the `uring_cmd` handler. > + #[inline] > + pub fn done(self: Pin<&mut IoUringCmd>, ret: Result<i32>, res2: u64, issue_flags: u32) { > + let ret = from_result(|| ret) as isize; What does this do? > + // SAFETY: The call guarantees that `self.inner` is not dangling and stays valid What do you mean “the call” ? > + unsafe { > + bindings::io_uring_cmd_done(self.inner.get(), ret, res2, issue_flags); > + } > + } > +} > + > +/// A Rust abstraction for the Linux kernel's `io_uring_sqe` structure. Please don’t mention the words “Linux kernel” here either. > +/// > +/// This structure is a safe, opaque wrapper around the raw C [`io_uring_sqe`](srctree/include/uapi/linux/io_uring.h) This line needs to be wrapped. > +/// binding from the Linux kernel. It represents a Submission Queue Entry Can you link somewhere here? Perhaps there’s docs for “Submission Queue Entry”. > +/// used in io_uring operations within the kernel. > +/// > +/// # Type Safety > +/// > +/// The `#[repr(transparent)]` attribute ensures that this wrapper has > +/// the same memory layout as the underlying `io_uring_sqe` structure, > +/// allowing it to be safely transmuted between the two representations. This is an invariant, please move it there. > +/// > +/// # Fields > +/// > +/// * `inner` - An opaque wrapper containing the actual `io_uring_sqe` data. > +/// The `Opaque` type prevents direct access to the internal > +/// structure fields, ensuring memory safety and encapsulation. Inline docs please. > +/// > +/// # Usage I don’t think we specifically need to mention “# Usage”. > +/// > +/// This type represents a submission queue entry that describes an I/O You can start with “Represents a…”. No need to say “this type” here. > +/// operation to be executed by the io_uring subsystem. It contains > +/// information such as the operation type, file descriptor, buffer > +/// pointers, and other operation-specific data. > +/// > +/// Users can obtain this type from [`IoUringCmd::sqe()`] method, which > +/// extracts the submission queue entry associated with a command. > +/// > +/// This type should not be constructed or manipulated directly by > +/// kernel module developers. By drivers. > +/// > +/// # INVARIANT /// # Invariants > +/// - `self.inner` always points to a valid, live `bindings::io_uring_sqe`. > +#[repr(transparent)] > +pub struct IoUringSqe { > + inner: Opaque<bindings::io_uring_sqe>, > +} > + > +impl IoUringSqe { > + /// Reads and interprets the `cmd` field of an `bindings::io_uring_sqe` as a value of type `T`. > + /// > + /// # Safety & Invariants Safety section for a safe function. > + /// - Construction of `T` is delegated to `FromBytes`, which guarantees that `T` has no > + /// invalid bit patterns and can be safely reconstructed from raw bytes. > + /// - **Limitation:** This implementation does not support `IORING_SETUP_SQE128` (larger SQE entries). Please no emphasis. > + /// Only the standard `io_uring_sqe` layout is handled here. > + /// > + /// # Errors Blank here. > + /// * Returns `EINVAL` if the `self` does not hold a `opcode::URING_CMD`. > + /// * Returns `EFAULT` if the command buffer is smaller than the requested type `T`. > + /// > + /// # Returns I don’t think we need a specific section for this. Just write this in normal prose please. > + /// * On success, returns a `T` deserialized from the `cmd`. > + /// * On failure, returns an appropriate error as described above. > + pub fn cmd_data<T: FromBytes>(&self) -> Result<T> { > + // SAFETY: `self.inner` guaranteed by the type invariant to point > + // to a live `io_uring_sqe`, so dereferencing is safe. > + let sqe = unsafe { &*self.inner.get() }; > + > + if u32::from(sqe.opcode) != opcode::URING_CMD { > + return Err(EINVAL); > + } > + > + // SAFETY: Accessing the `sqe.cmd` union field is safe because we've > + // verified that `sqe.opcode == IORING_OP_URING_CMD`, which guarantees > + // that this union variant is initialized and valid. > + let cmd = unsafe { sqe.__bindgen_anon_6.cmd.as_ref() }; > + let cmd_len = size_of_val(&sqe.__bindgen_anon_6.bindgen_union_field); > + > + if cmd_len < size_of::<T>() { > + return Err(EFAULT); EINVAL > + } > + > + let cmd_ptr = cmd.as_ptr() as *mut T; cast() > + > + // SAFETY: `cmd_ptr` is valid from `self.inner` which is guaranteed by > + // type variant. And also it points to initialized `T` from userspace. “Invariant”. “[…] an initialized T”. > + let ret = unsafe { core::ptr::read_unaligned(cmd_ptr) }; > + > + Ok(ret) > + } > + > + /// Constructs a new `IoUringSqe` from a raw `io_uring_sqe`. [`IoUringSqe`]. Please build the docs and make sure all your docs look nice. > + /// > + /// # Safety > + /// > + /// The caller must guarantee that: > + /// - `ptr` is non-null, properly aligned, and points to a valid initialized > + /// `bindings::io_uring_sqe`. > + /// - The pointed-to memory remains valid (not freed or repurposed) for the > + /// entire lifetime `'a` of the returned reference. > + /// - **Aliasing rules (for `&T`):** while the returned `&'a IoUringSqe` is > + /// alive, there must be **no mutable access** to the same object through any > + /// path (no `&mut`, no raw-pointer writes, no FFI/IRQ/other-CPU writers). > + /// Multiple `&` is fine **only if all of them are read-only** for the entire > + /// overlapping lifetime. > + /// - This relies on `IoUringSqe` being `repr(transparent)` over > + /// `bindings::io_uring_sqe`, so the cast preserves layout. Please rewrite this entire section given the feedback I gave higher up in this patch. > + #[inline] > + pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe { Private or pub(crate) at best. > + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the > + // duration of 'a. The cast is okay because `IoUringSqe` is `repr(transparent)` and has the > + // same memory layout as `bindings::io_uring_sqe`. > + unsafe { &*ptr.cast() } > + } > +} > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > index ed53169e795c..d38cf7137401 100644 > --- a/rust/kernel/lib.rs > +++ b/rust/kernel/lib.rs > @@ -91,6 +91,7 @@ > pub mod fs; > pub mod init; > pub mod io; > +pub mod io_uring; > pub mod ioctl; > pub mod jump_label; > #[cfg(CONFIG_KUNIT)] > -- > 2.43.0 >
On Wed, Aug 27, 2025 at 05:41:15PM -0300, Daniel Almeida wrote: Hi Daniel, > Hi Sidong, > > > On 22 Aug 2025, at 09:55, Sidong Yang <sidong.yang@furiosa.ai> wrote: > > > > Implment the io-uring abstractions needed for miscdevicecs and other > > char devices that have io-uring command interface. > > Can you expand on this last part? Sure. > > > > > * `io_uring::IoUringCmd` : Rust abstraction for `io_uring_cmd` which > > will be used as arg for `MiscDevice::uring_cmd()`. And driver can get > > `cmd_op` sent from userspace. Also it has `flags` which includes option > > that is reissued. > > > > This is a bit hard to parse. I'll fix this. > > > * `io_uring::IoUringSqe` : Rust abstraction for `io_uring_sqe` which > > could be get from `IoUringCmd::sqe()` and driver could get `cmd_data` > > from userspace. Also `IoUringSqe` has more data like opcode could be used in > > driver. > > Same here. Same here. > > > > > Signed-off-by: Sidong Yang <sidong.yang@furiosa.ai> > > --- > > rust/kernel/io_uring.rs | 306 ++++++++++++++++++++++++++++++++++++++++ > > rust/kernel/lib.rs | 1 + > > 2 files changed, 307 insertions(+) > > create mode 100644 rust/kernel/io_uring.rs > > > > diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs > > new file mode 100644 > > index 000000000000..61e88bdf4e42 > > --- /dev/null > > +++ b/rust/kernel/io_uring.rs > > @@ -0,0 +1,306 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI > > + > > +//! Abstractions for io-uring. > > +//! > > +//! This module provides types for implements io-uring interface for char device. > > This is also hard to parse. I'll fix this. > > > +//! > > +//! > > +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_uring/cmd.h) and > > +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/io_uring.h) > > + > > +use core::{mem::MaybeUninit, pin::Pin}; > > + > > +use crate::error::from_result; > > +use crate::transmute::{AsBytes, FromBytes}; > > +use crate::{fs::File, types::Opaque}; > > + > > +use crate::prelude::*; > > + > > +/// io-uring opcode > > /// `IoUring` opcodes. > > Notice: > > a) The capitalization, > b) The use of backticks, > c) The period in the end. > > This is an ongoing effort to keep the docs tidy :) Thanks :) > > > +pub mod opcode { > > + /// opcode for uring cmd > > + pub const URING_CMD: u32 = bindings::io_uring_op_IORING_OP_URING_CMD; > > +} > > Should this be its own type? This way we can use the type system to enforce > that only valid opcodes are used where an opcode is expected. Sure. How about a transparent struct like below. #[repr(transparent)] pub struct Opcode(u32); impl Opcode { pub const URING_CMD: Self = Self(bindings::io_uring_op_IORING_OP_URING_CMD as u32); } > > > + > > +/// A Rust abstraction for the Linux kernel's `io_uring_cmd` structure. > > /// A Rust abstraction for `io_uring_cmd`. Okay, I'll fixed all comments mentioning "Linux kernel". > > > +/// > > +/// This structure is a safe, opaque wrapper around the raw C `io_uring_cmd` > > +/// binding from the Linux kernel. It represents a command structure used > > +/// in io_uring operations within the kernel. > > This code will also be part of the kernel, so mentioning "the Linux kernel" is superfluous. Thanks. > > > +/// This type is used internally by the io_uring subsystem to manage > > +/// asynchronous I/O commands. > > +/// > > +/// This type should not be constructed or manipulated directly by > > +/// kernel module developers. > > "...by drivers". Thanks. > > > +/// > > +/// # INVARIANT > > /// # Invariants Thanks. > > > +/// - `self.inner` always points to a valid, live `bindings::io_uring_cmd`. > > Blank here Thanks. > > > +#[repr(transparent)] > > +pub struct IoUringCmd { > > + /// An opaque wrapper containing the actual `io_uring_cmd` data. > > + inner: Opaque<bindings::io_uring_cmd>, > > +} > > + > > +impl IoUringCmd { > > + /// Returns the cmd_op with associated with the `io_uring_cmd`. > > This sentence does not parse very well. I'll fix this. Like you said, it's better to not to mention the c structure. > > > + #[inline] > > + pub fn cmd_op(&self) -> u32 { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + unsafe { (*self.inner.get()).cmd_op } > > Perhaps add an as_raw() method so this becomes: > > unsafe {*self.as_raw()}.cmd_op Agreed. Also it would return the Opcode type than u32. > > > + } > > + > > + /// Returns the flags with associated with the `io_uring_cmd`. > > With the command, or something like that. The user doesn´t see the raw > bindings::io_uring_cmd so we shouldn´t be mentioning it if we can help it. Agreed. I'll try to fix this without mentioning the io_uring_cmd. > > > + #[inline] > > + pub fn flags(&self) -> u32 { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + unsafe { (*self.inner.get()).flags } > > + } > > + > > + /// Reads protocol data unit as `T` that impl `FromBytes` from uring cmd > > This sentence does not parse very well. I'll fix this. > > > + /// > > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > > /// # Errors Thanks. > > > + #[inline] > > + pub fn read_pdu<T: FromBytes>(&self) -> Result<T> { > > This takes &self, > > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let inner = unsafe { &mut *self.inner.get() }; > > But this creates a &mut to self.inner using unsafe code. Avoid doing this in > general. All of a sudden your type is not thread-safe anymore. > > If you need to mutate &self here, then take &mut self as an argument. > > > + > > + let len = size_of::<T>(); > > + if len > inner.pdu.len() { > > + return Err(EFAULT); > > EFAULT? How about EINVAL? Good. > > > + } > > + > > + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); > > + let ptr = &raw mut inner.pdu as *const c_void; > > + > > + // SAFETY: > > + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. > > + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked > > + // size of `T` is smaller than pdu size. > > + unsafe { > > + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); > > I don´t think you need to manually specify c_void here. > > Benno, can´t we use core::mem::zeroed() or something like that to avoid this unsafe? > > The input was zeroed in prep() and the output can just be a zeroed T on the > stack, unless I missed something? > > > + } > > + > > + // SAFETY: The read above has initialized all bytes in `out`, and since `T` implements > > + // `FromBytes`, any bit-pattern is a valid value for this type. > > + Ok(unsafe { out.assume_init() }) > > + } > > + > > + /// Writes the provided `value` to `pdu` in uring_cmd `self` > > Writes the provided `value` to `pdu`. Thanks. > > > + /// > > /// # Errors > /// Thanks. > > > + /// Fails with [`EFAULT`] if size of `T` is bigger than pdu size. > > > + #[inline] > > + pub fn write_pdu<T: AsBytes>(&mut self, value: &T) -> Result<()> { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let inner = unsafe { &mut *self.inner.get() }; > > + > > + let len = size_of::<T>(); > > + if len > inner.pdu.len() { > > + return Err(EFAULT); > > + } > > + > > + let src = (value as *const T).cast::<c_void>(); > > as_ptr().cast() > > > + let dst = &raw mut inner.pdu as *mut c_void; > > (&raw mut inner.pdu).cast() > Thanks. > > + > > + // SAFETY: > > + // * The `src` is points valid memory that is guaranteed by `T` impls `AsBytes` > > + // * The `dst` is valid. It's from `self.inner` that is guaranteed by type invariant. > > + // * It's safe to copy because size of `T` is no more than len of pdu. > > + unsafe { > > + core::ptr::copy_nonoverlapping(src, dst, len); > > + } > > + > > + Ok(()) > > + } > > + > > + /// Constructs a new [`IoUringCmd`] from a raw `io_uring_cmd` > > Missing period. Thanks. > > > + /// > > + /// # Safety > > + /// > > + /// The caller must guarantee that: > > + /// - `ptr` is non-null, properly aligned, and points to a valid > > + /// `bindings::io_uring_cmd`. > > Blanks for every bullet point, please. Thanks. > > > + /// - The pointed-to memory remains initialized and valid for the entire > > + /// lifetime `'a` of the returned reference. > > + /// - While the returned `Pin<&'a mut IoUringCmd>` is alive, the underlying > > + /// object is **not moved** (pinning requirement). > > They can´t move an !Unpin type in safe code. Okay, this could be removed. > > > + /// - **Aliasing rules:** the returned `&mut` has **exclusive** access to the same > > + /// object for its entire lifetime: > > You really don´t need to emphasize these. Okay. > > > + /// - No other `&mut` **or** `&` references to the same `io_uring_cmd` may be > > + /// alive at the same time. > > This and the point above are identical. It would be removed. > > > + /// - There must be no concurrent reads/writes through raw pointers, FFI, or > > + /// other kernel paths to the same object during this lifetime. > > This and the first point say the same thing. > > > + /// - If the object can be touched from other contexts (e.g. IRQ/another CPU), > > + /// the caller must provide synchronization to uphold this exclusivity. > > I am not sure what you mean. > > + /// - This function relies on `IoUringCmd` being `repr(transparent)` over > > + /// `bindings::io_uring_cmd` so the cast preserves layout. > > This is not a safety requirement. > > Just adapt the requirements from other instances of from_raw(), they all > convert a *mut T to a &T so the safety requirements are similar. > Okay, This safety comments are too redundant. I'll rewrite this. Thanks. > > + #[inline] > > + pub unsafe fn from_raw<'a>(ptr: *mut bindings::io_uring_cmd) -> Pin<&'a mut IoUringCmd> { > > Why is this pub? Sounds like a massive footgun? This should be private or at > best pub(crate). Because from_raw() would be used in miscdevice. pub(crate) will be okay. > > > > + // SAFETY: > > + // * The caller guarantees that the pointer is not dangling and stays > > + // valid for the duration of 'a. > > + // * The cast is okay because `IoUringCmd` is `repr(transparent)` and > > + // has the same memory layout as `bindings::io_uring_cmd`. > > + // * The returned `Pin` ensures that the object cannot be moved, which > > + // is required because the kernel may hold pointers to this memory > > + // location and moving it would invalidate those pointers. > > > + unsafe { Pin::new_unchecked(&mut *ptr.cast()) } > > + } > > + > > + /// Returns the file that referenced by uring cmd self. > > + #[inline] > > + pub fn file(&self) -> &File { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let file = unsafe { (*self.inner.get()).file }; > > + > > + // SAFETY: > > + // * The `file` points valid file. > > Why? `file` is from `self.inner` which is guranteed by the type invariant. I missed the comment. > > > + // * refcount is positive after submission queue entry issued. > > + // * There is no active fdget_pos region on the file on this thread. > > + unsafe { File::from_raw_file(file) } > > + } > > + > > + /// Returns an reference to the [`IoUringSqe`] associated with this command. > > s/an/a Thanks. > > > + #[inline] > > + pub fn sqe(&self) -> &IoUringSqe { > > + // SAFETY: `self.inner` is guaranteed by the type invariant to point > > + // to a live `io_uring_cmd`, so dereferencing is safe. > > + let sqe = unsafe { (*self.inner.get()).sqe }; > > + // SAFETY: The call guarantees that the `sqe` points valid io_uring_sqe. > > What do you mean by "the call guarantees" ? It's just miss. This should be "This call is guaranteed to be safe because...". > > > + unsafe { IoUringSqe::from_raw(sqe) } > > + } > > + > > + /// Completes an this [`IoUringCmd`] request that was previously queued. > > This sentence does not parse very well. I'll fix this. Thanks. > > > + /// > > + /// # Safety > > + /// > > + /// - This function must be called **only** for a command whose `uring_cmd` > > Please no emphasis. Thanks. > > > + /// handler previously returned **`-EIOCBQUEUED`** to io_uring. > > To what? Are you referring to a Rust type, or to the C part of the kernel? I referred C type but it's better to use Rust return type. > > > + /// > > + /// # Parameters > > + /// > > + /// - `ret`: Result to return to userspace. > > + /// - `res2`: Extra for big completion queue entry `IORING_SETUP_CQE32`. > > This sentence does not parse very well. Also, can you rename this? Okay. > > > + /// - `issue_flags`: Flags associated with this request, typically the same > > + /// as those passed to the `uring_cmd` handler. > > + #[inline] > > + pub fn done(self: Pin<&mut IoUringCmd>, ret: Result<i32>, res2: u64, issue_flags: u32) { > > + let ret = from_result(|| ret) as isize; > > What does this do? It casts Result<i32> to isize. `bindings::io_uring_cmd_done` receives `isize`. I wanted that `ret` would be `Result` than just i32. `ret` should be just isize? > > > + // SAFETY: The call guarantees that `self.inner` is not dangling and stays valid > > What do you mean "the call" ? Sorry, "This call is guruanteed to be safe ..." > > > + unsafe { > > + bindings::io_uring_cmd_done(self.inner.get(), ret, res2, issue_flags); > > + } > > + } > > +} > > + > > +/// A Rust abstraction for the Linux kernel's `io_uring_sqe` structure. > > Please don´t mention the words "Linux kernel" here either. Yes. > > > +/// > > +/// This structure is a safe, opaque wrapper around the raw C [`io_uring_sqe`](srctree/include/uapi/linux/io_uring.h) > > This line needs to be wrapped. Okay. > > > +/// binding from the Linux kernel. It represents a Submission Queue Entry > > Can you link somewhere here? Perhaps there´s docs for "Submission Queue > Entry". Okay I'll find it. > > > +/// used in io_uring operations within the kernel. > > +/// > > +/// # Type Safety > > +/// > > +/// The `#[repr(transparent)]` attribute ensures that this wrapper has > > +/// the same memory layout as the underlying `io_uring_sqe` structure, > > +/// allowing it to be safely transmuted between the two representations. > > This is an invariant, please move it there. Thanks. > > > +/// > > +/// # Fields > > +/// > > +/// * `inner` - An opaque wrapper containing the actual `io_uring_sqe` data. > > +/// The `Opaque` type prevents direct access to the internal > > +/// structure fields, ensuring memory safety and encapsulation. > > Inline docs please. Okay. > > > +/// > > +/// # Usage > > I don´t think we specifically need to mention "# Usage". Okay it would be deleted. > > > +/// > > +/// This type represents a submission queue entry that describes an I/O > > You can start with "Represents a...". No need to say "this type" here. Thanks. > > > +/// operation to be executed by the io_uring subsystem. It contains > > +/// information such as the operation type, file descriptor, buffer > > +/// pointers, and other operation-specific data. > > +/// > > +/// Users can obtain this type from [`IoUringCmd::sqe()`] method, which > > +/// extracts the submission queue entry associated with a command. > > +/// > > +/// This type should not be constructed or manipulated directly by > > +/// kernel module developers. > > By drivers. Thanks. > > > +/// > > +/// # INVARIANT > > /// # Invariants Thanks. > > > +/// - `self.inner` always points to a valid, live `bindings::io_uring_sqe`. > > +#[repr(transparent)] > > +pub struct IoUringSqe { > > + inner: Opaque<bindings::io_uring_sqe>, > > +} > > + > > +impl IoUringSqe { > > + /// Reads and interprets the `cmd` field of an `bindings::io_uring_sqe` as a value of type `T`. > > + /// > > + /// # Safety & Invariants > > Safety section for a safe function. Thanks. > > > + /// - Construction of `T` is delegated to `FromBytes`, which guarantees that `T` has no > > + /// invalid bit patterns and can be safely reconstructed from raw bytes. > > + /// - **Limitation:** This implementation does not support `IORING_SETUP_SQE128` (larger SQE entries). > > Please no emphasis. Okay. > > > > + /// Only the standard `io_uring_sqe` layout is handled here. > > + /// > > + /// # Errors > > Blank here. Thanks. > > > + /// * Returns `EINVAL` if the `self` does not hold a `opcode::URING_CMD`. > > + /// * Returns `EFAULT` if the command buffer is smaller than the requested type `T`. > > + /// > > + /// # Returns > > I don´t think we need a specific section for this. Just write this in > normal prose please. Okay. > > > > + /// * On success, returns a `T` deserialized from the `cmd`. > > + /// * On failure, returns an appropriate error as described above. > > + pub fn cmd_data<T: FromBytes>(&self) -> Result<T> { > > + // SAFETY: `self.inner` guaranteed by the type invariant to point > > + // to a live `io_uring_sqe`, so dereferencing is safe. > > + let sqe = unsafe { &*self.inner.get() }; > > + > > + if u32::from(sqe.opcode) != opcode::URING_CMD { > > + return Err(EINVAL); > > + } > > + > > + // SAFETY: Accessing the `sqe.cmd` union field is safe because we've > > + // verified that `sqe.opcode == IORING_OP_URING_CMD`, which guarantees > > + // that this union variant is initialized and valid. > > + let cmd = unsafe { sqe.__bindgen_anon_6.cmd.as_ref() }; > > + let cmd_len = size_of_val(&sqe.__bindgen_anon_6.bindgen_union_field); > > + > > + if cmd_len < size_of::<T>() { > > + return Err(EFAULT); > > EINVAL Thanks. > > > + } > > + > > + let cmd_ptr = cmd.as_ptr() as *mut T; > > cast() Thanks. > > > + > > + // SAFETY: `cmd_ptr` is valid from `self.inner` which is guaranteed by > > + // type variant. And also it points to initialized `T` from userspace. > > "Invariant". Thanks! > > "[...] an initialized T". Thanks. > > > > + let ret = unsafe { core::ptr::read_unaligned(cmd_ptr) }; > > + > > + Ok(ret) > > + } > > + > > + /// Constructs a new `IoUringSqe` from a raw `io_uring_sqe`. > > [`IoUringSqe`]. > > Please build the docs and make sure all your docs look nice. Okay, I'll review comments by docs. > > > + /// > > + /// # Safety > > + /// > > + /// The caller must guarantee that: > > + /// - `ptr` is non-null, properly aligned, and points to a valid initialized > > + /// `bindings::io_uring_sqe`. > > + /// - The pointed-to memory remains valid (not freed or repurposed) for the > > + /// entire lifetime `'a` of the returned reference. > > + /// - **Aliasing rules (for `&T`):** while the returned `&'a IoUringSqe` is > > + /// alive, there must be **no mutable access** to the same object through any > > + /// path (no `&mut`, no raw-pointer writes, no FFI/IRQ/other-CPU writers). > > + /// Multiple `&` is fine **only if all of them are read-only** for the entire > > + /// overlapping lifetime. > > + /// - This relies on `IoUringSqe` being `repr(transparent)` over > > + /// `bindings::io_uring_sqe`, so the cast preserves layout. > > Please rewrite this entire section given the feedback I gave higher up in this > patch. Okay. I'll rewrite this. > > > + #[inline] > > + pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe { > > Private or pub(crate) at best. Okay. pub(crate) > > > + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the > > + // duration of 'a. The cast is okay because `IoUringSqe` is `repr(transparent)` and has the > > + // same memory layout as `bindings::io_uring_sqe`. > > + unsafe { &*ptr.cast() } > > + } > > +} > > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > > index ed53169e795c..d38cf7137401 100644 > > --- a/rust/kernel/lib.rs > > +++ b/rust/kernel/lib.rs > > @@ -91,6 +91,7 @@ > > pub mod fs; > > pub mod init; > > pub mod io; > > +pub mod io_uring; > > pub mod ioctl; > > pub mod jump_label; > > #[cfg(CONFIG_KUNIT)] > > -- > > 2.43.0 > > > Thanks for detailed review! I'll revise the comments overall. Also there would be new type for opcode. Thanks, Sidong
[…] >> >>> + #[inline] >>> + pub unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) -> &'a IoUringSqe { >> >> Private or pub(crate) at best. > > Okay. pub(crate) Try to make things private and reach for pub(crate) here only when it’s actually needed. — Daniel
On Wed Aug 27, 2025 at 10:41 PM CEST, Daniel Almeida wrote: >> On 22 Aug 2025, at 09:55, Sidong Yang <sidong.yang@furiosa.ai> wrote: >> + } >> + >> + let mut out: MaybeUninit<T> = MaybeUninit::uninit(); >> + let ptr = &raw mut inner.pdu as *const c_void; >> + >> + // SAFETY: >> + // * The `ptr` is valid pointer from `self.inner` that is guaranteed by type invariant. >> + // * The `out` is valid pointer that points `T` which impls `FromBytes` and checked >> + // size of `T` is smaller than pdu size. >> + unsafe { >> + core::ptr::copy_nonoverlapping(ptr, out.as_mut_ptr().cast::<c_void>(), len); > > I don’t think you need to manually specify c_void here. In fact using `c_void` as the pointee type is wrong, since it's a ZST and thus this call copies no bytes whatsoever. > Benno, can’t we use core::mem::zeroed() or something like that to avoid this unsafe? > > The input was zeroed in prep() and the output can just be a zeroed T on the > stack, unless I missed something? Hmm I'm not sure I follow, I don't think that `mem::zeroed` will help (it also is an unsafe function). We have a helper in flight that might be useful in this case: https://lore.kernel.org/all/20250826-nova_firmware-v2-1-93566252fe3a@nvidia.com --- Cheers, Benno
© 2016 - 2025 Red Hat, Inc.