From nobody Mon Jun 15 06:30:21 2026 Received: from mail-pg1-f170.google.com (mail-pg1-f170.google.com [209.85.215.170]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 79E2D3264E5 for ; Wed, 15 Apr 2026 09:09:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.170 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244154; cv=none; b=sf9+xOJryQX1B4qthLxHv8o92nSiwRBQ4jMy626gaQF64TF7LvlrvK1sIAFfytaF1frmjUOim0D9MZzCYB5eONLryfjvQNnTiVGSgZQ4qI6xi3DwdH9C1qxP0/toLseVcenOCObiZ9bmDzaVfcfnaRsBzwclqTYLSey/Ky3mdz0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244154; c=relaxed/simple; bh=p2j8Luk7W6yr0bxlOlpHiNO3IOJ/KtgqMq3H9b3ynRQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=WPPRVcCDs7CdPWSxenJk7j8o7IQBOIywBrHZl0gwuAUa0W7hlsvT8lkopcO4o8gDWPtDVjHI5NXr7qmVafIr9wbQjXwCbssg2tGOpZbj7AWjgaDzLikGDxcNKXyjET3dvc2ZZu7TB+oMf2BATaG6Sko1GFO71MDKcYGelrGONN4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai; spf=none smtp.mailfrom=furiosa.ai; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b=M8ghO42Q; arc=none smtp.client-ip=209.85.215.170 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b="M8ghO42Q" Received: by mail-pg1-f170.google.com with SMTP id 41be03b00d2f7-c795f75e971so67109a12.2 for ; Wed, 15 Apr 2026 02:09:13 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=furiosa.ai; s=google; t=1776244153; x=1776848953; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=8CWG3OmkxNDqiNbb7KQHfaHwxQ5jJUOD3gTNZtX0NKo=; b=M8ghO42QTUxohxakaJXzdMZYl8H0I7sKlDVV/8f1utqRS6HUioiyzIyIfgUysqY0zh 1y2WDp8UPeLR3K3r218Hg0qKiOmp+SskueuZwP/Q+rEkaja/IuE6TYLMGXyvAyTZNA74 vs9rAG8eoBJwJj4zc3lUwEekr/YfSZ+NU9W0s= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20251104; t=1776244153; x=1776848953; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from :to:cc:subject:date:message-id:reply-to; bh=8CWG3OmkxNDqiNbb7KQHfaHwxQ5jJUOD3gTNZtX0NKo=; b=gTTDISSmMWVhzFWUuJ8sT2ywPGjedXFoFQnpNJh1hFVhUkdxx+f3CSDA244gLcOVX2 Pv0ugiWLMl0h1Zk76NJ2NGH2UhRbhDidE23qIwuwCCUJP3o/VDxKaCIhmA0dLV10NDwj CIzC+y5w2Ifi0T/g+xOeVpBKiqU11VAQy3i21xR9GJFsJjiUm8UgHfIVpfM6l371v/fb n0u2WQBedplTjAqS0APjmSTVo3arSa+vOC/lVewxHaMXL8G7q0jMWVjw2qj3qS+sYkaP XVyHBhaY4dyNyTdsCTozdrPfK69li1bHcnXM8GJX6jhZqHVjNgHaoSN1jOfaMUT0hnEx +hwQ== X-Forwarded-Encrypted: i=1; AFNElJ8a5jf9txXkn/eygDz/U4++E1tHv5AUaJ2OWDUFtFuIwSLlw2Gep8eT10/1mpGhERDFbXsdYr+EtQwNsZg=@vger.kernel.org X-Gm-Message-State: AOJu0YxCPVN5oVGSbj2nf9IZsmeMxxvuV91+wF3GDpqWUiDchPdLZAkP aXI+m/4NoNh+fcIJVgBF7TYs6UdKR0WWgE4Br/iWTia/rdqbK6igJfjEbYEGEDXNCfk= X-Gm-Gg: AeBDieu6ZEWjH/FGTnqoh7WFELpzHhoNUcKSqDqHucI0PX/YPIMdgHRb5PKRgGAAPFz yHoU6Q9wnJEu9dSXT6/kebC+vEMdsb1kxjVi/hRwVJOo8ByvQAC+fYNcToBHynQz2OxIKQcg6hR K1L2zo7ChdJZQ9RZAnb3QRlpyegNnOO81LiVxY6q6xFUJzBoPaXkrrPgnb5SEcKhpTh/fEwAee0 lk6MQUFnapayNuTVDq3Sst4bK8ga8QbooYPGr4BTHalrLNfuQ1WPJqgFY329hU00akA4YQjCtFT hRyUKaFTNug4+5yeee0MAsUnnGkJOdVh9c/N9C5dGzFWOOpaMU8ZPmy/17TtTBW7PIoePNWfmU9 9sigOx2Hr5mFqnSCO0+Phe5McJtmzemTskDKbsx+qsp9kYXU9AlyhPJx7wVz9nSMXf/sPnndVUl OIIEt976fOOqVBjWLFV+N48FBmYlDNDrQAiGXJJ5zpL0YHijgTpstKbZ9zp2g= X-Received: by 2002:a05:6a20:a122:b0:39c:787:f17a with SMTP id adf61e73a8af0-39fe3fae289mr22725946637.41.1776244152976; Wed, 15 Apr 2026 02:09:12 -0700 (PDT) Received: from sidong.sidong.yang.office.furiosa.vpn ([61.83.209.48]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-c7957ecee24sm1074619a12.1.2026.04.15.02.09.10 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 15 Apr 2026 02:09:12 -0700 (PDT) From: Sidong Yang To: Jens Axboe , Daniel Almeida , Caleb Sander Mateos , Benno Lossin Cc: Miguel Ojeda , Arnd Bergmann , Greg Kroah-Hartman , rust-for-linux@vger.kernel.org, linux-kernel@vger.kernel.org, io-uring@vger.kernel.org, Sidong Yang Subject: [PATCH v5 1/4] rust: bindings: add io_uring headers in bindings_helper.h Date: Wed, 15 Apr 2026 09:02:12 +0000 Message-ID: <20260415090851.4897-2-sidong.yang@furiosa.ai> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20260415090851.4897-1-sidong.yang@furiosa.ai> References: <20260415090851.4897-1-sidong.yang@furiosa.ai> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add io_uring.h and io_uring/cmd.h to the Rust bindings header, placed in alphabetical order, to provide access to the io_uring command infrastructure from Rust. These are needed by the Rust io_uring abstraction introduced in a subsequent patch. Signed-off-by: Sidong Yang --- rust/bindings/bindings_helper.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helpe= r.h index faf3ee634ced..b7b0d549a061 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -60,6 +60,8 @@ #include #include #include +#include +#include #include #include #include --=20 2.43.0 From nobody Mon Jun 15 06:30:21 2026 Received: from mail-pg1-f181.google.com (mail-pg1-f181.google.com [209.85.215.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 710093264EA for ; Wed, 15 Apr 2026 09:09:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244158; cv=none; b=Z5VqmaKzk6Gzf1LwSHQEIT+M0hxugutUwdJjQglNiDIKjgzVlEojO7RQ1IUDuEYCvXqkwEMiLqwlHNeQUbiyS7Z7XKF/J+OZZDNpYlaUvJkh0zZSnpxPHlXsitLBJOF79VXd4INsKrCv6KQU/FM4S/apoY2RkzsH9kYkzV4lyKs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244158; c=relaxed/simple; bh=MqveC8XXEm6pefql1KDgono+sxaB4lcVnRYDUzg5rrI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=DTPzbELS10uo3w+0ba2TdT9BHaBP72NTl9/gJ0vIxmP3K8SW81Z0at6ZFLsQEAgHE0dH9Yq27fSsnKGM5kAOJCzs9OAc8rj6W7Zs6zIcLE6IXdmjwMBVlbVzB10iB8CddWCke9zXC7gQrzW6SBQB6t3Gf06rjYOjRhi3+/ArD9M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai; spf=none smtp.mailfrom=furiosa.ai; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b=XJ3ufK8d; arc=none smtp.client-ip=209.85.215.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b="XJ3ufK8d" Received: by mail-pg1-f181.google.com with SMTP id 41be03b00d2f7-c76b9efc299so2502391a12.0 for ; Wed, 15 Apr 2026 02:09:16 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=furiosa.ai; s=google; t=1776244156; x=1776848956; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=zlboZSOXebDl0HjeGDN0M1rgRpMVd0coDjlr8qnoRi8=; b=XJ3ufK8daM9KK9GDSlfb7VeSFSn+OV7ar/eatw9XHHKXZcHNMqADkG54UVBuA6Ussb qfT6WFAnA7h8X6H9WFxyx6zr4WIV54fTX2zj8+rubCsHIrr8H7uZCjREecHJBX8UMjQf Y2N7tbRgSxqT7Jb3/9tFJvAwggil3MR1b+hgk= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20251104; t=1776244156; x=1776848956; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from :to:cc:subject:date:message-id:reply-to; bh=zlboZSOXebDl0HjeGDN0M1rgRpMVd0coDjlr8qnoRi8=; b=jvLNhbTJ8KiS6NqBpN0p1mpbfCYpXnJ0n7z058CCWWyfeqJWDXO2bySfhd9fZvidu4 OeJ27efjifS3N2OmF20IgoqUyxxoMb/48Ohocyy30kZg0xOylOFtqyul59AZR+uVc2Ne bOchyHjJQHIAOycZ5Ev0nYgN5nFjWyjDxpM8M7IbFQefCmQzaJyzmWPQJ7P6uSy89vm7 dP+scrIwbGxPE6jACX4Tx+LMhqIhA3bSOgH0YXbwjMkDfYvIMbc6/dJRlT7RFYJMxrwo MhsRqgAqQQILMzwD3/5aA5+rtc10e87NZA8MA64AHLbmibjPK+wBEe8td3XVLyK0992v Eutg== X-Forwarded-Encrypted: i=1; AFNElJ/LYat/0t226YPYuEJvjtlmpwZKQI2CxEElhLV3We8615i3Vw41kEXhoOexyQbi+fj31TBfg/BJbVvK5N0=@vger.kernel.org X-Gm-Message-State: AOJu0YwwrY89EggC4eKP1Y16YnzNEP8GeLRPqba9BzrAc5crDDepWk3s oWINNBnhvI11LVRLcRetlKM8awL6mEtJjUigNfZMUT1pTiTM3zwtLNWKHl21+B5IH+Y= X-Gm-Gg: AeBDietRvfHQlbBvVfBDWmekIbnjeLjuh8M9yfYihJjAZkx1ugNRD3w71deHcDIvJFD gkOWtNc7zDXhqyg0I1ilRrIc7CwLGJCDX7DyksA4DKd6nOnAAYT42tvLChAvOnvc2y4kRZ3s2FQ P/AEnc8VMlWhZAdrOBTab1kjaW+STI2XH+P3YjxUDNcAaAZPBfimyRuxy0/tdLzN4hJvXFWPIHS cXO4rSbRq5wMdRmVi3K35JJ2uclSDxV0GZ6Pb57ai50PCXi5KK5V4Uq+Z8GRHHMnyaOrD00FaSB ItKpPKHMq48O8U9nuT58Pk6XweSjBLdse7J3aTYrt6kJiYdNKRmoeTf8RVRuN+NgSkqa7/ZaAR+ JpSFsic3jKsgnhSnirMtWEg69N7DHIjd2+yXcM0ohr1Ht9wDdFRHK83C3r1tDvTSYqEiTRl0O4x 3y4e89wgsESRAHC45anTPuMeM6TQBqzANPDgvjiAu+3UoV7glaUTIRE7Fr1Ws= X-Received: by 2002:a05:6a20:72a5:b0:398:7ca0:5d32 with SMTP id adf61e73a8af0-39fe3fae157mr21644352637.43.1776244155657; Wed, 15 Apr 2026 02:09:15 -0700 (PDT) Received: from sidong.sidong.yang.office.furiosa.vpn ([61.83.209.48]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-c7957ecee24sm1074619a12.1.2026.04.15.02.09.13 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 15 Apr 2026 02:09:15 -0700 (PDT) From: Sidong Yang To: Jens Axboe , Daniel Almeida , Caleb Sander Mateos , Benno Lossin Cc: Miguel Ojeda , Arnd Bergmann , Greg Kroah-Hartman , rust-for-linux@vger.kernel.org, linux-kernel@vger.kernel.org, io-uring@vger.kernel.org, Sidong Yang Subject: [PATCH v5 2/4] rust: io_uring: introduce rust abstraction for io-uring cmd Date: Wed, 15 Apr 2026 09:02:13 +0000 Message-ID: <20260415090851.4897-3-sidong.yang@furiosa.ai> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20260415090851.4897-1-sidong.yang@furiosa.ai> References: <20260415090851.4897-1-sidong.yang@furiosa.ai> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Implement io-uring abstractions for character devices that expose an io_uring command interface via IORING_OP_URING_CMD. The core types are: - IoUringCmd: received by a driver's uring_cmd callback. Provides access to cmd_op, flags, the associated file, and a typed PDU (protocol data unit). Must be either completed synchronously via complete() or queued for async completion via queue(). - QueuedIoUringCmd: obtained from IoUringCmd::queue(). The driver calls done() on this handle to post the completion to userspace. - IoUringSqe: the submission queue entry, available through IoUringCmd::sqe(). Exposes the opcode and inline cmd_data. - Opcode: a newtype wrapper around the u8 opcode field, with a URING_CMD constant for driver-specific passthrough commands. - UringCmdAction: a type-state enum (Complete | Queued) returned by the driver callback to indicate the completion path taken. Signed-off-by: Sidong Yang --- rust/helpers/helpers.c | 1 + rust/helpers/io_uring.c | 15 ++ rust/kernel/io_uring.rs | 522 ++++++++++++++++++++++++++++++++++++++++ rust/kernel/lib.rs | 1 + 4 files changed, 539 insertions(+) create mode 100644 rust/helpers/io_uring.c create mode 100644 rust/kernel/io_uring.rs diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index e05c6e7e4abb..3fa2b3d9f83a 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -62,6 +62,7 @@ #include "irq.c" #include "fs.c" #include "io.c" +#include "io_uring.c" #include "jump_label.c" #include "kunit.c" #include "maple_tree.c" diff --git a/rust/helpers/io_uring.c b/rust/helpers/io_uring.c new file mode 100644 index 000000000000..154f67fb3637 --- /dev/null +++ b/rust/helpers/io_uring.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +__rust_helper void rust_helper_io_uring_cmd_done32(struct io_uring_cmd *cm= d, s32 ret, + u64 res2, unsigned int issue_flags) +{ + io_uring_cmd_done32(cmd, ret, res2, issue_flags); +} + +__rust_helper struct io_uring_cmd * +rust_helper_io_uring_cmd_from_tw(struct io_tw_req tw_req) +{ + return io_uring_cmd_from_tw(tw_req); +} diff --git a/rust/kernel/io_uring.rs b/rust/kernel/io_uring.rs new file mode 100644 index 000000000000..606d282e606b --- /dev/null +++ b/rust/kernel/io_uring.rs @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: GPL-2.0 +// SPDX-FileCopyrightText: (C) 2025 Furiosa AI + +//! Abstractions for io-uring. +//! +//! This module provides abstractions for the io-uring interface for chara= cter devices. +//! +//! C headers: [`include/linux/io_uring/cmd.h`](srctree/include/linux/io_u= ring/cmd.h) and +//! [`include/linux/io_uring/io_uring.h`](srctree/include/linux/io_uring/i= o_uring.h) + +use core::ptr::NonNull; + +use crate::error::from_result; +use crate::transmute::{AsBytes, FromBytes}; +use crate::{fs::File, types::Opaque}; + +use crate::prelude::*; + +/// Size in bytes of the protocol data unit (PDU) embedded in `io_uring_cm= d`. +/// +/// Matches the size of the `pdu` field in `struct io_uring_cmd` as define= d in +/// `include/linux/io_uring/cmd.h`. +pub(crate) const PDU_SIZE: usize =3D 32; + +/// `issue_flags` value for completions posted from task_work context. +/// +/// Equivalent to the C `IO_URING_CMD_TASK_WORK_ISSUE_FLAGS` macro. +/// Pass this to [`QueuedIoUringCmd::done`] when completing from the +/// task_work callback scheduled by [`QueuedIoUringCmd::complete_in_task`]. +pub const TASK_WORK_ISSUE_FLAGS: u32 =3D + bindings::io_uring_cmd_flags_IO_URING_F_COMPLETE_DEFER as u32; + +/// Opcode of an [`IoUringSqe`]. +/// +/// Each submission queue entry in io_uring specifies an operation +/// to perform, such as read, write, or a driver-specific `URING_CMD`. +#[repr(transparent)] +#[derive(PartialEq)] +pub struct Opcode(u8); + +impl Opcode { + /// Driver-specific passthrough command. + pub const URING_CMD: Self =3D Self(bindings::io_uring_op_IORING_OP_URI= NG_CMD as u8); +} + +/// A fresh `io_uring_cmd` received from the driver callback. +/// +/// Represents a submission received from userspace via `IORING_OP_URING_C= MD`. +/// A driver obtains this from the `uring_cmd` callback in [`crate::miscde= vice::MiscDevice`]. +/// +/// The driver must either complete the command synchronously by calling +/// [`Self::complete`], or queue it for asynchronous completion by calling +/// [`Self::queue`], which yields a [`QueuedIoUringCmd`] handle. +/// +/// # Invariants +/// +/// `self.inner` is non-null, properly aligned, and points to a valid, live +/// `bindings::io_uring_cmd` for the duration of the driver callback. +pub struct IoUringCmd { + inner: NonNull, +} + +// SAFETY: `io_uring_cmd` is a kernel-allocated structure. The kernel +// guarantees that it remains alive until the driver either returns a +// non-`EIOCBQUEUED` result or calls `io_uring_cmd_done32()`. Moving the +// pointer to another thread is safe: the kernel object is not tied to any +// particular CPU or task context. +unsafe impl Send for IoUringCmd {} + +// SAFETY: All `&self` methods on `IoUringCmd` only read from the underlyi= ng +// `io_uring_cmd` (cmd_op, flags, sqe, file). `write_pdu` takes `&mut sel= f`, +// so the borrow checker prevents concurrent mutable access. Sharing +// `&IoUringCmd` across threads is therefore safe. +unsafe impl Sync for IoUringCmd {} + +/// An [`IoUringCmd`] that has been queued for asynchronous completion. +/// +/// The only way to obtain a `QueuedIoUringCmd` is through [`IoUringCmd::q= ueue`], +/// which ensures the command was properly handed off to the async path be= fore +/// [`UringCmdAction::Queued`] is returned to the vtable. +/// +/// Call [`Self::done`] exactly once to post the completion to userspace. +/// +/// # Invariants +/// +/// `self.inner` is non-null, properly aligned, and points to a valid, live +/// `bindings::io_uring_cmd` until [`Self::done`] is called. +pub struct QueuedIoUringCmd { + inner: NonNull, +} + +// SAFETY: Same reasoning as for `IoUringCmd`. After `queue()`, the handle= is +// intentionally moved to a different context (e.g. a workqueue) to call +// `done()` later. +unsafe impl Send for QueuedIoUringCmd {} + +// SAFETY: All `&self` methods on `QueuedIoUringCmd` only read from the +// underlying `io_uring_cmd`. +unsafe impl Sync for QueuedIoUringCmd {} + +/// Proof that a `uring_cmd` request completed synchronously. +pub struct CompleteAction { + ret: i32, +} + +impl CompleteAction { + /// Returns the userspace result for this synchronous completion. + #[inline] + pub fn ret(&self) -> i32 { + self.ret + } +} + +/// Proof that a `uring_cmd` request was queued for asynchronous completio= n. +/// +/// This type has a private field and can only be constructed inside this = module, +/// so it can only be obtained through [`IoUringCmd::queue`]. +pub struct QueuedAction { + _private: (), +} + +/// Completion mode for `uring_cmd`. +pub enum UringCmdAction { + /// Request is completed synchronously and returns this result to user= space. + Complete(CompleteAction), + /// Request is queued for asynchronous completion. + /// + /// This variant can only be constructed by calling [`IoUringCmd::queu= e`], + /// which enforces that the caller holds a [`QueuedIoUringCmd`] handle= and + /// will eventually call [`QueuedIoUringCmd::done`]. + Queued(QueuedAction), +} + +impl IoUringCmd { + /// Returns the `cmd_op` associated with this command. + #[inline] + pub fn cmd_op(&self) -> u32 { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + unsafe { (*self.as_raw()).cmd_op } + } + + /// Returns the flags field of this command. + /// + /// The returned value is `io_uring_cmd.flags`, which is a combination= of: + /// - User-set flags from `sqe->uring_cmd_flags` (bits 0=E2=80=931): + /// `IORING_URING_CMD_FIXED`, `IORING_URING_CMD_MULTISHOT`. + /// - Kernel-set flags (bits 30=E2=80=9331): + /// `IORING_URING_CMD_CANCELABLE`, `IORING_URING_CMD_REISSUE`. + /// + /// Note: this is **not** the `issue_flags` parameter passed to the + /// `uring_cmd` callback, which carries `IO_URING_F_*` flags such as + /// `IO_URING_F_NONBLOCK`. + #[inline] + pub fn flags(&self) -> u32 { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + unsafe { (*self.as_raw()).flags } + } + + /// Reads the protocol data unit (PDU) as a value of type `T`. + /// + /// # Errors + /// + /// Returns [`EINVAL`] if `size_of::()` exceeds the PDU size. + #[inline] + pub fn read_pdu(&self) -> Result { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + let inner =3D unsafe { &*self.inner.as_ref() }; + + if size_of::() > inner.pdu.len() { + return Err(EINVAL); + } + + let ptr =3D inner.pdu.as_ptr().cast::(); + + // SAFETY: `ptr` is a valid pointer derived from `self.inner`, whi= ch + // is guaranteed by the type invariant. `size_of::()` bytes are + // available in the PDU (checked above). `read_unaligned` is used + // because the PDU is a byte array and may not satisfy `T`'s align= ment. + // `T: FromBytes` guarantees that every bit-pattern is a valid val= ue. + Ok(unsafe { core::ptr::read_unaligned(ptr) }) + } + + /// Writes `value` to the PDU of this command. + /// + /// # Errors + /// + /// Returns [`EINVAL`] if `size_of::()` exceeds the PDU size. + #[inline] + pub fn write_pdu(&mut self, value: &T) -> Result<()> { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + let inner =3D unsafe { self.inner.as_mut() }; + + let len =3D size_of::(); + if len > inner.pdu.len() { + return Err(EINVAL); + } + + let src =3D core::ptr::from_ref(value).cast::(); + let dst =3D (&raw mut inner.pdu).cast::(); + + // SAFETY: + // * `src` points to valid memory because `T: AsBytes`. + // * `dst` is valid and derived from `self.inner`, which is guaran= teed + // by the type invariant. + // * The byte count does not exceed the PDU length (checked above). + unsafe { + core::ptr::copy_nonoverlapping(src, dst, len); + } + + Ok(()) + } + + /// Constructs an [`IoUringCmd`] from a raw pointer. + /// + /// # Safety + /// + /// The caller must guarantee that: + /// - `ptr` is non-null, properly aligned, and points to a valid, init= ialised + /// `bindings::io_uring_cmd`. + /// - The pointed-to object remains alive until the driver either retu= rns a + /// non-`EIOCBQUEUED` value or calls [`QueuedIoUringCmd::done`]. + /// - No other mutable reference to the same object exists for the dur= ation + /// of the returned handle's lifetime. + #[inline] + pub(crate) unsafe fn from_raw(ptr: *mut bindings::io_uring_cmd) -> Res= ult { + let Some(inner) =3D NonNull::new(ptr) else { + return Err(EINVAL); + }; + + Ok(Self { inner }) + } + + /// Returns a raw pointer to the underlying `io_uring_cmd`. + #[inline] + fn as_raw(&self) -> *mut bindings::io_uring_cmd { + self.inner.as_ptr() + } + + /// Returns the file associated with this command. + /// + /// The returned reference is valid for the lifetime of `&self`. The = kernel + /// holds a reference to the file for the entire lifetime of the enclo= sing + /// `io_kiocb`, so this is safe to call at any point while `IoUringCmd= ` is + /// alive. + #[inline] + pub fn file(&self) -> &File { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + let file =3D unsafe { (*self.as_raw()).file }; + + // SAFETY: + // * The `io_kiocb` holds a reference to the file for its entire + // lifetime, so `file` is valid and has a positive refcount. + // * There is no active fdget_pos region on the file on this threa= d. + unsafe { File::from_raw_file(file) } + } + + /// Returns a reference to the [`IoUringSqe`] associated with this com= mand. + #[inline] + pub fn sqe(&self) -> &IoUringSqe { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + let sqe =3D unsafe { self.inner.as_ref().sqe }; + // SAFETY: `sqe` is a valid pointer set by the io_uring core during + // submission queue entry preparation and remains valid for the li= fetime + // of the `io_uring_cmd`. + unsafe { IoUringSqe::from_raw(sqe) } + } + + /// Marks this command as completed synchronously with the provided re= turn value. + /// + /// The vtable will return `ret` directly to the io_uring core, which = posts + /// the completion queue entry. No further action is needed from the = driver. + #[inline] + pub fn complete(self, ret: i32) -> UringCmdAction { + UringCmdAction::Complete(CompleteAction { ret }) + } + + /// Queues this command for asynchronous completion. + /// + /// Returns a [`UringCmdAction::Queued`] token to return from the driv= er + /// callback and a [`QueuedIoUringCmd`] handle that must be used to ca= ll + /// [`QueuedIoUringCmd::done`] at a later point. + /// + /// Because [`QueuedAction`] has a private field, [`UringCmdAction::Qu= eued`] + /// can **only** be constructed through this method. This prevents a = driver + /// from accidentally returning `Queued` after already completing the = command + /// via `done()`. + #[inline] + pub fn queue(self) -> (UringCmdAction, QueuedIoUringCmd) { + let queued =3D QueuedIoUringCmd { inner: self.inner }; + (UringCmdAction::Queued(QueuedAction { _private: () }), queued) + } +} + +impl QueuedIoUringCmd { + /// Returns the `cmd_op` associated with this command. + #[inline] + pub fn cmd_op(&self) -> u32 { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + unsafe { (*self.inner.as_ptr()).cmd_op } + } + + /// Returns the file associated with this command. + /// + /// See [`IoUringCmd::file`] for safety details. + #[inline] + pub fn file(&self) -> &File { + // SAFETY: Same as `IoUringCmd::file`. + let file =3D unsafe { (*self.inner.as_ptr()).file }; + // SAFETY: The `io_kiocb` holds a reference to the file for its en= tire + // lifetime, so `file` is valid and has a positive refcount. + unsafe { File::from_raw_file(file) } + } + + /// Reads the PDU as a value of type `T`. + /// + /// See [`IoUringCmd::read_pdu`] for details and error conditions. + #[inline] + pub fn read_pdu(&self) -> Result { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + let inner =3D unsafe { &*self.inner.as_ref() }; + + if size_of::() > inner.pdu.len() { + return Err(EINVAL); + } + + let ptr =3D inner.pdu.as_ptr().cast::(); + + // SAFETY: Same as `IoUringCmd::read_pdu`. + Ok(unsafe { core::ptr::read_unaligned(ptr) }) + } + + /// Writes `value` to the PDU of this command. + /// + /// See [`IoUringCmd::write_pdu`] for details and error conditions. + #[inline] + pub fn write_pdu(&mut self, value: &T) -> Result<()> { + // SAFETY: `self.inner` is guaranteed by the type invariant to poi= nt + // to a live `io_uring_cmd`, so dereferencing is safe. + let inner =3D unsafe { self.inner.as_mut() }; + + let len =3D size_of::(); + if len > inner.pdu.len() { + return Err(EINVAL); + } + + let src =3D core::ptr::from_ref(value).cast::(); + let dst =3D (&raw mut inner.pdu).cast::(); + + // SAFETY: Same as `IoUringCmd::write_pdu`. + unsafe { + core::ptr::copy_nonoverlapping(src, dst, len); + } + + Ok(()) + } + + /// Posts the completion to userspace. + /// + /// This calls `io_uring_cmd_done()` directly, so the caller must be i= n a + /// context where that is safe. In practice this means the task_work + /// callback scheduled by [`Self::complete_in_task`]. + /// + /// # Parameters + /// + /// - `ret`: Result to return to userspace. + /// - `res2`: Extra result word for `IORING_SETUP_CQE32` big-CQE rings; + /// pass `0` if not needed. + /// - `issue_flags`: Flags describing the current execution context. + /// Use [`TASK_WORK_ISSUE_FLAGS`] from a task_work callback. + #[inline] + pub fn done(self, ret: Result, res2: u64, issue_flags: u32) { + let ret =3D from_result(|| ret); + // SAFETY: `self.inner` is a valid `io_uring_cmd` that was previou= sly + // queued (returned `EIOCBQUEUED` to io_uring). The kernel keeps = the + // `io_kiocb` alive until this call completes. + unsafe { + bindings::io_uring_cmd_done32(self.inner.as_ptr(), ret, res2, = issue_flags); + } + } + + /// Schedules the completion to run in the submitter's task context. + /// + /// When the task_work fires, [`IoUringTaskWork::task_work`] is called + /// with a reconstructed [`QueuedIoUringCmd`]. The implementor must + /// call [`Self::done`] with [`TASK_WORK_ISSUE_FLAGS`] to finish the + /// request. + /// + /// This is safe to call from any context (workqueue, IRQ, softirq, et= c.). + /// The PDU contents are preserved across this call, so the driver can + /// store arbitrary state in the PDU before calling this method and re= ad + /// it back inside the task_work callback. + #[inline] + pub fn complete_in_task(self) { + /// # Safety + /// + /// Called by the io_uring core in the submitter's task context. + unsafe extern "C" fn trampoline( + tw_req: bindings::io_tw_req, + _tw: bindings::io_tw_token_t, + ) { + // SAFETY: `io_uring_cmd_from_tw` returns a valid `io_uring_cm= d` + // pointer. The io_uring core keeps the `io_kiocb` alive until + // the task_work callback returns. + let ptr =3D unsafe { bindings::io_uring_cmd_from_tw(tw_req) }; + let cmd =3D QueuedIoUringCmd { + inner: unsafe { NonNull::new_unchecked(ptr) }, + }; + T::task_work(cmd); + } + + // SAFETY: `self.inner` is a valid `io_uring_cmd`. + // `IOU_F_TWQ_LAZY_WAKE` uses lazy wakeup semantics (same as + // `io_uring_cmd_do_in_task_lazy` in C). + unsafe { + bindings::__io_uring_cmd_do_in_task( + self.inner.as_ptr(), + Some(trampoline::), + bindings::IOU_F_TWQ_LAZY_WAKE, + ); + } + } +} + +/// Trait for handling io_uring command completion in task_work context. +/// +/// Implement this trait and pass the type to +/// [`QueuedIoUringCmd::complete_in_task`] to schedule deferred completion. +/// +/// The implementor must call [`QueuedIoUringCmd::done`] with +/// [`TASK_WORK_ISSUE_FLAGS`] to complete the request. +pub trait IoUringTaskWork { + /// Called in the submitter's task context. + fn task_work(cmd: QueuedIoUringCmd); +} + +/// A Rust abstraction for `io_uring_sqe`. +/// +/// Represents a Submission Queue Entry (SQE) that describes an I/O operat= ion +/// to be executed by the io_uring subsystem. Obtain an instance from +/// [`IoUringCmd::sqe`]. +/// +/// This type should not be constructed directly by drivers. +/// +/// # Invariants +/// +/// `self.inner` always points to a valid, live `bindings::io_uring_sqe`. +/// The `repr(transparent)` attribute guarantees the same memory layout as= the +/// underlying binding. +#[repr(transparent)] +pub struct IoUringSqe { + inner: Opaque, +} + +impl IoUringSqe { + /// Returns the opcode of this SQE. + pub fn opcode(&self) -> Opcode { + // SAFETY: `self.inner` guaranteed by the type invariant to point + // to a live `io_uring_sqe`, so dereferencing is safe. Volatile + // read is used because the SQE may reside in memory shared with + // userspace. + Opcode(unsafe { core::ptr::addr_of!((*self.inner.get()).opcode).re= ad_volatile() }) + } + + /// Reads the inline `cmd` data of this SQE as a value of type `T`. + /// + /// Only the standard `io_uring_sqe` layout is supported + /// (`IORING_SETUP_SQE128` is not handled here). + /// + /// # Errors + /// + /// Returns [`EINVAL`] if `size_of::()` exceeds the inline command = buffer. + pub fn cmd_data(&self) -> Result { + // SAFETY: `self.inner` guaranteed by the type invariant to point + // to a live `io_uring_sqe`, so dereferencing is safe. + let sqe =3D unsafe { &*self.inner.get() }; + + // SAFETY: Accessing the `sqe.cmd` union field is safe because + // `IoUringSqe` can only be obtained from `IoUringCmd::sqe()`, whi= ch + // is only available inside a `uring_cmd` callback where the opcode + // is guaranteed to be `IORING_OP_URING_CMD` by the io_uring core. + let cmd =3D unsafe { sqe.__bindgen_anon_6.cmd.as_ref() }; + let cmd_len =3D size_of_val(&sqe.__bindgen_anon_6.bindgen_union_fi= eld); + + if cmd_len < size_of::() { + return Err(EINVAL); + } + + let cmd_ptr =3D cmd.as_ptr().cast::(); + + // SAFETY: `cmd_ptr` is valid, derived from `self.inner` which is + // guaranteed by the type invariant. `read_unaligned` is used beca= use + // the cmd data may not satisfy `T`'s alignment requirements. + // `T: FromBytes` guarantees that every bit-pattern is a valid val= ue. + Ok(unsafe { core::ptr::read_unaligned(cmd_ptr) }) + } + + /// Constructs an [`IoUringSqe`] reference from a raw pointer. + /// + /// # Safety + /// + /// The caller must guarantee that: + /// - `ptr` is non-null, properly aligned, and points to a valid, init= ialised + /// `bindings::io_uring_sqe`. + /// - The pointed-to object remains valid for the entire lifetime `'a`. + /// - No mutable access to the same object occurs while the returned + /// reference is alive. + #[inline] + pub(crate) unsafe fn from_raw<'a>(ptr: *const bindings::io_uring_sqe) = -> &'a IoUringSqe { + // SAFETY: The caller guarantees that the pointer is not dangling = and + // stays valid for the duration of 'a. The cast is valid because + // `IoUringSqe` is `repr(transparent)` over `bindings::io_uring_sq= e`. + unsafe { &*ptr.cast() } + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 0fa9d820fe7c..235d1d03dde2 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -76,6 +76,7 @@ pub mod impl_flags; pub mod init; pub mod io; +pub mod io_uring; pub mod ioctl; pub mod iommu; pub mod iov; --=20 2.43.0 From nobody Mon Jun 15 06:30:21 2026 Received: from mail-pg1-f174.google.com (mail-pg1-f174.google.com [209.85.215.174]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D4604329E5A for ; Wed, 15 Apr 2026 09:09:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.174 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244160; cv=none; b=g4owfE059WZEVKeTud41dcEIEulpwiRCZjPn4JBw6JBJWUAxiQ3L30G7+Tnc/PDqoC8Z+eemQjB/mzAYib6DuvRhwycsbwHdtaYGTcgRCPpUPCOZBdYJeHBBAsgLeu7uaYe9J+QN/MKXzf0dlPqymWmCUAgHyRDeUC/vc5ATMBE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244160; c=relaxed/simple; bh=Bll4HoxZRoadw3eBKwz/4mSEumf8EkZvc3IvUn6eioY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=gSl2D+9Q3eYM/wla4kxUhHDs9m9m8+LAIKmIjNkkNBLreigiZYxqR8xsI9zgJHp6xFdwq/RH9xMl1RatPKJCuflZEErNK4XI+FgS3LAR0QCKnmiayskZM9fkfyKCPquL0SefE6tKEFRg3z/rgsxA0pO/38Xkg6RzwyEQl4MGyqE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai; spf=none smtp.mailfrom=furiosa.ai; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b=LPLKD0tK; arc=none smtp.client-ip=209.85.215.174 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b="LPLKD0tK" Received: by mail-pg1-f174.google.com with SMTP id 41be03b00d2f7-c76b6abdb73so4579445a12.1 for ; Wed, 15 Apr 2026 02:09:18 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=furiosa.ai; s=google; t=1776244158; x=1776848958; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=POyxFYQ+V9mFipVgPFkvK5v/sTQB318bLmjeylTNt7s=; b=LPLKD0tKgKfPh8vYiiiYqBgk+kP9aHC9CBsLSaTb9840FRjN04bDdXPAXnPf0MhEkW MENYRxzu5C3YWfBDjHXOFRx1egzSnOT79SC/P4YdizmLiuPC8puMHFKdeC+G+Fuy3Ky7 RxoFL9qO83yVasKwLWxoWPSNIzRPyI+yC/K3Q= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20251104; t=1776244158; x=1776848958; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from :to:cc:subject:date:message-id:reply-to; bh=POyxFYQ+V9mFipVgPFkvK5v/sTQB318bLmjeylTNt7s=; b=ZH8xivUGvyoR5xCK9IwcUqoIUm0vIawSfeNMcGip04iL0JbF1YxVr9X5dxXJ7Xpot3 4qc9/ZEO5K2dCjhcvwHHvRkyuYXutMWhJn8u+ZKNQKgVnEJuA9lKf/psIysIIs2xXrEF PMJiY26TV34CpZRjop4jQv2MZgNbrwONG+7MSBuAdRm7xqQw6ItyCzphtah3tl7HWURo 7pNHPYP4IBP2uIIQc4NuWkWQCe9583wSN8cX8QUf/3hpzQioHfe3yUv1787UjQHc8zvI iXakkvJA37SHm0VCmtRjH8+dNPIOzxJZlr+rAvHgMHEzm4MqIxBVHVhJJvcjvVvk9TJD 5tgw== X-Forwarded-Encrypted: i=1; AFNElJ9j+ZE8RFO8pwRVQ71uRyo45eJ7NBuyaQNTEJ2QT6iyepNuPGFSDsuLlrbcIqEmLZMhrkbogIg8ZXkEj4Q=@vger.kernel.org X-Gm-Message-State: AOJu0YxSAaHfiTrf6BQNfueM6actvErF/g6BBkmToIi5X0BtNqM7omhd +Z9FpI8ISir/PC5lIYi37C0Mxgd4VXE3AUHpO16Jg78CMavgug+p5tEVvDH9CGEX0FM= X-Gm-Gg: AeBDievnA+vm5g42lHAx7a39sM9kLFgE30XO5CD+JuMIuVTIDwjqnYJkrNraCHvtmvG lKPLPceUR3EEQBq9UFt/qlg5FaZr0x5NkR2u2F4DrcS/7bEtQ95mblhoWEuN7x3hZy84+DIPrxV wiw4kwK4Jr2fsXGswYlcBdZJ5QAXfafSJOJYZUMbkhHP2iQHlp0GZsVVz4HlNgD/1+fuu7nNfhn h/sQAXf98h+FRjlzKAqUU9eo11B5fsSKXgTKSUYpsXp63YiVEGQ8FkV3VHGN8cJzDsne+Fne05F hy40y3bC0wrs5OfMK1b4gA6olGLrw2nad+/6ksyYKxrUyzpq/o9i0zrjJ6mPnrole6H8fojrNIz 5k48KzJbl7bd23SdsQ+ju0xjSp5Biz9Tq/lojDTU3CGTvq7ERU8Zjatu8cbzj62XyWJJ/eGl/3u QWlzjxOg8f53X1rg4Be4EUMID84DMT66XLIOzBDPBVgjHUu7MTAYChnhS/8Bw= X-Received: by 2002:a05:6300:210d:b0:39f:c48d:fcd2 with SMTP id adf61e73a8af0-39fe3ddd0bdmr22715132637.27.1776244158271; Wed, 15 Apr 2026 02:09:18 -0700 (PDT) Received: from sidong.sidong.yang.office.furiosa.vpn ([61.83.209.48]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-c7957ecee24sm1074619a12.1.2026.04.15.02.09.15 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 15 Apr 2026 02:09:17 -0700 (PDT) From: Sidong Yang To: Jens Axboe , Daniel Almeida , Caleb Sander Mateos , Benno Lossin Cc: Miguel Ojeda , Arnd Bergmann , Greg Kroah-Hartman , rust-for-linux@vger.kernel.org, linux-kernel@vger.kernel.org, io-uring@vger.kernel.org, Sidong Yang Subject: [PATCH v5 3/4] rust: miscdevice: Add `uring_cmd` support Date: Wed, 15 Apr 2026 09:02:14 +0000 Message-ID: <20260415090851.4897-4-sidong.yang@furiosa.ai> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20260415090851.4897-1-sidong.yang@furiosa.ai> References: <20260415090851.4897-1-sidong.yang@furiosa.ai> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a uring_cmd method to the MiscDevice trait and wire it up to file_operations, allowing Rust misc device drivers to handle IORING_OP_URING_CMD submissions from io_uring. The vtable wrapper zero-initialises the PDU for fresh (non-reissued) commands so that drivers always start from a clean state. On reissue the PDU retains its contents from the previous attempt. To enable uring_cmd for a specific misc device, set HAS_URING_CMD to true in the MiscDevice implementation. Signed-off-by: Sidong Yang --- rust/kernel/miscdevice.rs | 81 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index c3c2052c9206..549693e6aea0 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -14,6 +14,7 @@ error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, ffi::{c_int, c_long, c_uint, c_ulong}, fs::{File, Kiocb}, + io_uring::{self, IoUringCmd, UringCmdAction}, iov::{IovIterDest, IovIterSource}, mm::virt::VmaNew, prelude::*, @@ -190,6 +191,31 @@ fn show_fdinfo( ) { build_error!(VTABLE_DEFAULT_ERROR) } + + /// Handler for `uring_cmd`. + /// + /// Invoked when userspace submits an `IORING_OP_URING_CMD` entry to t= he + /// io-uring submission queue for a file backed by this driver. + /// + /// The driver must either complete the command synchronously by calli= ng + /// [`IoUringCmd::complete`] and returning `Ok(UringCmdAction::Complet= e(_))`, + /// or queue it for asynchronous completion by calling [`IoUringCmd::q= ueue`] + /// and returning `Ok(UringCmdAction::Queued(_))`. In the latter case= the + /// driver must eventually call [`crate::io_uring::QueuedIoUringCmd::d= one`] + /// to post the completion to userspace. + /// + /// `issue_flags` carries `IO_URING_F_*` flags (e.g. `IO_URING_F_NONBL= OCK`) + /// describing the current execution context. When completing + /// asynchronously, do **not** forward this value to + /// [`crate::io_uring::QueuedIoUringCmd::done`]; see its documentation= for + /// the correct flags to use in each completion context. + fn uring_cmd( + _device: ::Borrowed<'_>, + _io_uring_cmd: IoUringCmd, + _issue_flags: u32, + ) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } } =20 /// A vtable for the file operations of a Rust miscdevice. @@ -387,6 +413,56 @@ impl MiscdeviceVTable { T::show_fdinfo(device, m, file); } =20 + /// # Safety + /// + /// - The pointer `ioucmd` is not null and points to a valid `bindings= ::io_uring_cmd`. + unsafe extern "C" fn uring_cmd( + ioucmd: *mut bindings::io_uring_cmd, + issue_flags: ffi::c_uint, + ) -> c_int { + // SAFETY: `file` referenced by `ioucmd` is valid pointer. It's as= signed in + // uring cmd preparation. So dereferencing is safe. + let raw_file =3D unsafe { (*ioucmd).file }; + + // SAFETY: `private_data` is guaranteed that it has valid pointer = after + // this file opened. So dereferencing is safe. + let private =3D unsafe { (*raw_file).private_data }.cast(); + + // SAFETY: `ioucmd` is not null and points to valid memory `bindin= gs::io_uring_cmd` + // and the memory pointed by `ioucmd` is valid and will not be mov= ed or + // freed for the lifetime of returned value `ioucmd` + let ioucmd =3D unsafe { IoUringCmd::from_raw(ioucmd) }; + let mut ioucmd =3D match ioucmd { + Ok(ioucmd) =3D> ioucmd, + Err(e) =3D> { + return e.to_errno(); + } + }; + + // Zero-initialize the PDU for fresh (non-reissued) commands so th= at + // drivers reading from it always start from a clean state. On re= issue + // the PDU retains its contents from the previous attempt, which i= s the + // expected behaviour (e.g. a driver may store state there across + // -EAGAIN retries). + if (ioucmd.flags() & bindings::IORING_URING_CMD_REISSUE) =3D=3D 0 { + if let Err(e) =3D ioucmd.write_pdu(&[0u8; io_uring::PDU_SIZE])= { + return e.to_errno(); + } + } + + // SAFETY: This call is safe because `private` is returned by + // `into_foreign` in [`open`]. And it's guaranteed + // that `from_foreign` is called by [`release`] after the end of + // the lifetime of `device` + let device =3D unsafe { ::borrow(private= ) }; + + match T::uring_cmd(device, ioucmd, issue_flags) { + Ok(UringCmdAction::Complete(action)) =3D> action.ret(), + Ok(UringCmdAction::Queued(_)) =3D> EIOCBQUEUED.to_errno(), + Err(e) =3D> e.to_errno(), + } + } + const VTABLE: bindings::file_operations =3D bindings::file_operations { open: Some(Self::open), release: Some(Self::release), @@ -419,6 +495,11 @@ impl MiscdeviceVTable { } else { None }, + uring_cmd: if T::HAS_URING_CMD { + Some(Self::uring_cmd) + } else { + None + }, ..pin_init::zeroed() }; =20 --=20 2.43.0 From nobody Mon Jun 15 06:30:21 2026 Received: from mail-pg1-f170.google.com (mail-pg1-f170.google.com [209.85.215.170]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2AD0A3264E3 for ; Wed, 15 Apr 2026 09:09:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.170 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244163; cv=none; b=N6IkEGyrzRc7qll0U946t2lT6KOXp90gXSTE20QiWC7B3VWzvJpHKXXwz9+68nOYCMxzKv2D+zr4ySlsAIUukEWW0iOBIJ7xun/gylq7VORTcBPvmSMkklxsDh6pUfg/8ZCYiBYBIOk1aXrD8Suut3d1/0THQsL1jzpcsNsEnkM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1776244163; c=relaxed/simple; bh=ubBzsvdvi4CPPSq8r6r63vK+XNzjHX2CU/Xs07mhl4w=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jujh9hyN6ccrQftzWRug5bSg2F2Oq7WgT6/UAkQRy0JUOVrx6/qVpwwKiCUdRamL/F86OG4SMCswPRQbFdmSCk9LCOQ2ZhEzJ9iDL5CbASlBtXTncWXSveXHA9UT1ojMOf/EMJTKq2BkTazXcjS0vImCN3TljcbvGb3jxm5NxZc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai; spf=none smtp.mailfrom=furiosa.ai; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b=A0keWuRI; arc=none smtp.client-ip=209.85.215.170 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=furiosa.ai Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=furiosa.ai header.i=@furiosa.ai header.b="A0keWuRI" Received: by mail-pg1-f170.google.com with SMTP id 41be03b00d2f7-c6e2355739dso2935120a12.2 for ; Wed, 15 Apr 2026 02:09:22 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=furiosa.ai; s=google; t=1776244162; x=1776848962; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=0sT90Da8dxnzlMMkK3ArmGE7gcm3zpo3Cwr+6iFpQpk=; b=A0keWuRIsJxgGzsyA/DFNLpYSJoNQRyAHQXS9HgKMKIkySBA/TWm1AFBifCpuNHm+X BobZAiur5c8Pr9vlIXqcNEttlb5s5VMgTWn9bNtX1oKT0gcMcVPTcaVD9c8AOyWnWK5u dmPhUqNmBYz6cojD7uLLKndPyGn/TGFEYWWDM= X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20251104; t=1776244162; x=1776848962; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-gg:x-gm-message-state:from :to:cc:subject:date:message-id:reply-to; bh=0sT90Da8dxnzlMMkK3ArmGE7gcm3zpo3Cwr+6iFpQpk=; b=D4m6mgPY0Su0WBpQuXpKe+0NMnIglALN/Qw6ijbZvp764WpQTRPacp7hfipW3ZYqOE ZoI99JnjpkD1LoSIWn/Qamz7V9wZTmFeRQYLrhRLxRultXNnVmKB3VOy6dxBoU3i6yWR gRQ6N3A2zWYkGh5NyRbOiQQ3FNBhHlpdV9QeNMHaikX4x7E244OzVGK+akTIwLOhxHXh 3IL2qvenk9k4FeP/Bj3Vt7GjtYMDa15+iaTkFEvz1LYauCjxGcBXR2XoAJGmCSMp4qCs 6FNfJQNjAEBrlpkDNGbtF9Kqd2Vvw6XreVgb/MqFa594h3DPDR+w68LGXkHBmWv/dLHU +gjA== X-Forwarded-Encrypted: i=1; AFNElJ/SpK6GlydFflJYlkdcSyZjg0WexHY5DRGlz8SJFoBrCCueV564QQMWLg3iHVN8/KFKFMWbO9HAV8oI7oE=@vger.kernel.org X-Gm-Message-State: AOJu0Yyc8anwZfdrDtwY3ZiZ4hsv5IP4WjhMp1phI+R3OxJMrteyxU/4 10p50DOemFVORltf4CtEdHTk6Lx2pHuSpJOq3DcU30yBTQhqLqHJt2mFjbh/BTzUkB4= X-Gm-Gg: AeBDietkHe8tlcd3dKRZ2eKADz99Wu/VAMvkLeJhm2UjpG+FRHZYA/iQRr/80E4Ks/+ CozEDvZs3JxeHqZikEkmgW8YEobYggzc/SeS6tvr+beW3XTnxp9QBzn+3vD66ZEuMn7Hmmjn9Be 9UDh94NyFBnQ0Cx95zwoMcdLw6JdqSLKjkzLXwnSsQ1Tdi695ZOv4oKGiPtwfiA5DGnUQFim3aW 9+u7V5fnWSTWTmgxWtW3zATnWSWPrbeGPk7yngrnOr+tLUVI54ZixFNstu7b5F0T3UPVYHS0NpW v2Pg+Ia+SKvMDgbcWCw4cLnuc/II4jgDmCOwc9f7S1rRFIkX5UkIJ84YsqBbNA557LZLCrjeyZe LrPiU8XrWfNO1fsCjVzeciYTvTm68WFzr6QcG4Q9omNupVWP+1XhWd7668vIXedecnUGjATZr81 Wh7vnk8zhpCJk4XplrvMh559KvJYwaViDZrKuBUz8Z2WUglG1e26bEe4qLEuY= X-Received: by 2002:a05:6a20:6a2c:b0:39c:4b84:d90f with SMTP id adf61e73a8af0-39fe3c6796emr22382238637.8.1776244161341; Wed, 15 Apr 2026 02:09:21 -0700 (PDT) Received: from sidong.sidong.yang.office.furiosa.vpn ([61.83.209.48]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-c7957ecee24sm1074619a12.1.2026.04.15.02.09.18 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 15 Apr 2026 02:09:21 -0700 (PDT) From: Sidong Yang To: Jens Axboe , Daniel Almeida , Caleb Sander Mateos , Benno Lossin Cc: Miguel Ojeda , Arnd Bergmann , Greg Kroah-Hartman , rust-for-linux@vger.kernel.org, linux-kernel@vger.kernel.org, io-uring@vger.kernel.org, Sidong Yang Subject: [PATCH v5 4/4] samples: rust: Add `uring_cmd` example to `rust_misc_device` Date: Wed, 15 Apr 2026 09:02:15 +0000 Message-ID: <20260415090851.4897-5-sidong.yang@furiosa.ai> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20260415090851.4897-1-sidong.yang@furiosa.ai> References: <20260415090851.4897-1-sidong.yang@furiosa.ai> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Extend the rust_misc_device sample to demonstrate uring_cmd handling. The example completes asynchronously using a workqueue combined with complete_in_task(), showing the full async completion flow: IoUringCmd -> queue() -> workqueue -> complete_in_task() -> task_work -> done(). Signed-off-by: Sidong Yang --- samples/rust/rust_misc_device.rs | 62 +++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_devi= ce.rs index 87a1fe63533a..4059348a56ad 100644 --- a/samples/rust/rust_misc_device.rs +++ b/samples/rust/rust_misc_device.rs @@ -98,13 +98,15 @@ use kernel::{ device::Device, fs::{File, Kiocb}, + io_uring::{self, IoUringCmd, IoUringTaskWork, QueuedIoUringCmd, UringC= mdAction}, ioctl::{_IO, _IOC_SIZE, _IOR, _IOW}, iov::{IovIterDest, IovIterSource}, miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration}, new_mutex, prelude::*, - sync::{aref::ARef, Mutex}, + sync::{Arc, aref::ARef, Mutex}, uaccess::{UserSlice, UserSliceReader, UserSliceWriter}, + workqueue::{impl_has_work, new_work, HasWork}, }; =20 const RUST_MISC_DEV_HELLO: u32 =3D _IO('|' as u32, 0x80); @@ -151,6 +153,51 @@ struct RustMiscDevice { dev: ARef, } =20 +#[pin_data] +struct IoUringCmdWork { + #[pin] + ioucmd: Mutex>, + #[pin] + work: kernel::workqueue::Work, +} + +impl_has_work! { + impl HasWork for IoUringCmdWork { self.work } +} + +/// Task-work completion handler for the sample device. +struct RustMiscDeviceCompletion; + +impl IoUringTaskWork for RustMiscDeviceCompletion { + fn task_work(cmd: QueuedIoUringCmd) { + cmd.done(Ok(0), 0, io_uring::TASK_WORK_ISSUE_FLAGS); + } +} + +impl kernel::workqueue::WorkItem for IoUringCmdWork { + type Pointer =3D Arc; + + fn run(work: Arc) { + pr_info!("IoUringCmdWork::run()"); + + if let Some(ioucmd) =3D work.ioucmd.lock().take() { + ioucmd.complete_in_task::(); + } + } +} + +impl IoUringCmdWork { + fn new(ioucmd: QueuedIoUringCmd) -> Result> { + Arc::pin_init( + pin_init!(Self { + ioucmd <- new_mutex!(Some(ioucmd)), + work <- new_work!("IoUringCmdWork::work"), + }), + GFP_KERNEL, + ) + } +} + #[vtable] impl MiscDevice for RustMiscDevice { type Ptr =3D Pin>; @@ -220,6 +267,19 @@ fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: = u32, arg: usize) -> Result =20 Ok(0) } + + fn uring_cmd( + me: Pin<&RustMiscDevice>, + ioucmd: IoUringCmd, + _issue_flags: u32, + ) -> Result { + dev_info!(me.dev, "UringCmd Rust Misc Device Sample\n"); + + let (action, queued_ioucmd) =3D ioucmd.queue(); + let work =3D IoUringCmdWork::new(queued_ioucmd)?; + let _ =3D kernel::workqueue::system().enqueue(work); + Ok(action) + } } =20 #[pinned_drop] --=20 2.43.0