Provide a `MiscDevice` trait that lets you specify the file operations
that you wish to provide for your misc device. For now, only three file
operations are provided: open, close, ioctl.
These abstractions only support MISC_DYNAMIC_MINOR. This enforces that
new miscdevices should not hard-code a minor number.
When implementing ioctl, the Result type is used. This means that you
can choose to return either of:
* An integer of type isize.
* An errno using the kernel::error::Error type.
When returning an isize, the integer is returned verbatim. It's mainly
intended for returning positive integers to userspace. However, it is
technically possible to return errors via the isize return value too.
To avoid having a dependency on files, this patch does not provide the
file operations callbacks a pointer to the file. This means that they
cannot check file properties such as O_NONBLOCK (which Binder needs).
Support for that can be added as a follow-up.
To avoid having a dependency on vma, this patch does not provide any way
to implement mmap (which Binder needs). Support for that can be added as
a follow-up.
Rust Binder will use these abstractions to create the /dev/binder file
when binderfs is disabled.
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
rust/bindings/bindings_helper.h | 1 +
rust/kernel/lib.rs | 1 +
rust/kernel/miscdevice.rs | 241 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 243 insertions(+)
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index ae82e9c941af..84303bf221dd 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -15,6 +15,7 @@
#include <linux/firmware.h>
#include <linux/jiffies.h>
#include <linux/mdio.h>
+#include <linux/miscdevice.h>
#include <linux/phy.h>
#include <linux/refcount.h>
#include <linux/sched.h>
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 22a3bfa5a9e9..e268eae54c81 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -39,6 +39,7 @@
#[cfg(CONFIG_KUNIT)]
pub mod kunit;
pub mod list;
+pub mod miscdevice;
#[cfg(CONFIG_NET)]
pub mod net;
pub mod page;
diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
new file mode 100644
index 000000000000..cbd5249b5b45
--- /dev/null
+++ b/rust/kernel/miscdevice.rs
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Miscdevice support.
+//!
+//! C headers: [`include/linux/miscdevice.h`](srctree/include/linux/miscdevice.h).
+//!
+//! Reference: <https://www.kernel.org/doc/html/latest/driver-api/misc_devices.html>
+
+use crate::{
+ bindings,
+ error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR},
+ prelude::*,
+ str::CStr,
+ types::{ForeignOwnable, Opaque},
+};
+use core::{
+ ffi::{c_int, c_long, c_uint, c_ulong},
+ marker::PhantomData,
+ mem::MaybeUninit,
+ pin::Pin,
+};
+
+/// Options for creating a misc device.
+#[derive(Copy, Clone)]
+pub struct MiscDeviceOptions {
+ /// The name of the miscdevice.
+ pub name: &'static CStr,
+}
+
+impl MiscDeviceOptions {
+ /// Create a raw `struct miscdev` ready for registration.
+ pub const fn into_raw<T: MiscDevice>(self) -> bindings::miscdevice {
+ // SAFETY: All zeros is valid for this C type.
+ let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() };
+ result.minor = bindings::MISC_DYNAMIC_MINOR as _;
+ result.name = self.name.as_char_ptr();
+ result.fops = create_vtable::<T>();
+ result
+ }
+}
+
+/// A registration of a miscdevice.
+///
+/// # Invariants
+///
+/// `inner` is a registered misc device.
+#[repr(transparent)]
+#[pin_data(PinnedDrop)]
+pub struct MiscDeviceRegistration<T> {
+ #[pin]
+ inner: Opaque<bindings::miscdevice>,
+ _t: PhantomData<T>,
+}
+
+// SAFETY: It is allowed to call `misc_deregister` on a different thread from where you called
+// `misc_register`.
+unsafe impl<T> Send for MiscDeviceRegistration<T> {}
+// SAFETY: All `&self` methods on this type are written to ensure that it is safe to call them in
+// parallel.
+unsafe impl<T> Sync for MiscDeviceRegistration<T> {}
+
+impl<T: MiscDevice> MiscDeviceRegistration<T> {
+ /// Register a misc device.
+ pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> {
+ try_pin_init!(Self {
+ inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| {
+ // SAFETY: The initializer can write to the provided `slot`.
+ unsafe { slot.write(opts.into_raw::<T>()) };
+
+ // SAFETY: We just wrote the misc device options to the slot. The miscdevice will
+ // get unregistered before `slot` is deallocated because the memory is pinned and
+ // the destructor of this type deallocates the memory.
+ // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered
+ // misc device.
+ to_result(unsafe { bindings::misc_register(slot) })
+ }),
+ _t: PhantomData,
+ })
+ }
+
+ /// Returns a raw pointer to the misc device.
+ pub fn as_raw(&self) -> *mut bindings::miscdevice {
+ self.inner.get()
+ }
+}
+
+#[pinned_drop]
+impl<T> PinnedDrop for MiscDeviceRegistration<T> {
+ fn drop(self: Pin<&mut Self>) {
+ // SAFETY: We know that the device is registered by the type invariants.
+ unsafe { bindings::misc_deregister(self.inner.get()) };
+ }
+}
+
+/// Trait implemented by the private data of an open misc device.
+#[vtable]
+pub trait MiscDevice {
+ /// What kind of pointer should `Self` be wrapped in.
+ type Ptr: ForeignOwnable + Send + Sync;
+
+ /// Called when the misc device is opened.
+ ///
+ /// The returned pointer will be stored as the private data for the file.
+ fn open() -> Result<Self::Ptr>;
+
+ /// Called when the misc device is released.
+ fn release(device: Self::Ptr) {
+ drop(device);
+ }
+
+ /// Handler for ioctls.
+ ///
+ /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].
+ ///
+ /// [`kernel::ioctl`]: mod@crate::ioctl
+ fn ioctl(
+ _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+ _cmd: u32,
+ _arg: usize,
+ ) -> Result<isize> {
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
+ }
+
+ /// Handler for ioctls.
+ ///
+ /// Used for 32-bit userspace on 64-bit platforms.
+ ///
+ /// This method is optional and only needs to be provided if the ioctl relies on structures
+ /// that have different layout on 32-bit and 64-bit userspace. If no implementation is
+ /// provided, then `compat_ptr_ioctl` will be used instead.
+ #[cfg(CONFIG_COMPAT)]
+ fn compat_ioctl(
+ _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+ _cmd: u32,
+ _arg: usize,
+ ) -> Result<isize> {
+ kernel::build_error(VTABLE_DEFAULT_ERROR)
+ }
+}
+
+const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations {
+ const fn maybe_fn<T: Copy>(check: bool, func: T) -> Option<T> {
+ if check {
+ Some(func)
+ } else {
+ None
+ }
+ }
+
+ struct VtableHelper<T: MiscDevice> {
+ _t: PhantomData<T>,
+ }
+ impl<T: MiscDevice> VtableHelper<T> {
+ const VTABLE: bindings::file_operations = bindings::file_operations {
+ open: Some(fops_open::<T>),
+ release: Some(fops_release::<T>),
+ unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::<T>),
+ #[cfg(CONFIG_COMPAT)]
+ compat_ioctl: if T::HAS_COMPAT_IOCTL {
+ Some(fops_compat_ioctl::<T>)
+ } else if T::HAS_IOCTL {
+ Some(bindings::compat_ptr_ioctl)
+ } else {
+ None
+ },
+ ..unsafe { MaybeUninit::zeroed().assume_init() }
+ };
+ }
+
+ &VtableHelper::<T>::VTABLE
+}
+
+unsafe extern "C" fn fops_open<T: MiscDevice>(
+ inode: *mut bindings::inode,
+ file: *mut bindings::file,
+) -> c_int {
+ // SAFETY: The pointers are valid and for a file being opened.
+ let ret = unsafe { bindings::generic_file_open(inode, file) };
+ if ret != 0 {
+ return ret;
+ }
+
+ let ptr = match T::open() {
+ Ok(ptr) => ptr,
+ Err(err) => return err.to_errno(),
+ };
+
+ // SAFETY: The open call of a file owns the private data.
+ unsafe { (*file).private_data = ptr.into_foreign().cast_mut() };
+
+ 0
+}
+
+unsafe extern "C" fn fops_release<T: MiscDevice>(
+ _inode: *mut bindings::inode,
+ file: *mut bindings::file,
+) -> c_int {
+ // SAFETY: The release call of a file owns the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: The release call of a file owns the private data.
+ let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) };
+
+ T::release(ptr);
+
+ 0
+}
+
+unsafe extern "C" fn fops_ioctl<T: MiscDevice>(
+ file: *mut bindings::file,
+ cmd: c_uint,
+ arg: c_ulong,
+) -> c_long {
+ // SAFETY: The ioctl call of a file can access the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: Ioctl calls can borrow the private data of the file.
+ let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+
+ match T::ioctl(device, cmd as u32, arg as usize) {
+ Ok(ret) => ret as c_long,
+ Err(err) => err.to_errno() as c_long,
+ }
+}
+
+#[cfg(CONFIG_COMPAT)]
+unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
+ file: *mut bindings::file,
+ cmd: c_uint,
+ arg: c_ulong,
+) -> c_long {
+ // SAFETY: The compat ioctl call of a file can access the private data.
+ let private = unsafe { (*file).private_data };
+ // SAFETY: Ioctl calls can borrow the private data of the file.
+ let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+
+ match T::compat_ioctl(device, cmd as u32, arg as usize) {
+ Ok(ret) => ret as c_long,
+ Err(err) => err.to_errno() as c_long,
+ }
+}
--
2.46.1.824.gd892dcdcdd-goog
Hi Alice, Greg, On Tue, Oct 1, 2024 at 10:23 AM Alice Ryhl <aliceryhl@google.com> wrote: > > + compat_ioctl: if T::HAS_COMPAT_IOCTL { > + Some(fops_compat_ioctl::<T>) > + } else if T::HAS_IOCTL { > + Some(bindings::compat_ptr_ioctl) > + } else { > + None > + }, > + ..unsafe { MaybeUninit::zeroed().assume_init() } With the lints series queued for the next cycle, Clippy spots the missing `// SAFETY` comment here... > +unsafe extern "C" fn fops_open<T: MiscDevice>( > + inode: *mut bindings::inode, > + file: *mut bindings::file, > +) -> c_int { ...as well as the missing `# Safety` section for each of these. It can be seen in e.g. today's -next. I hope that helps! Cheers, Miguel
On Mon, Oct 21, 2024 at 12:34 PM Miguel Ojeda <miguel.ojeda.sandonis@gmail.com> wrote: > > Hi Alice, Greg, > > On Tue, Oct 1, 2024 at 10:23 AM Alice Ryhl <aliceryhl@google.com> wrote: > > > > + compat_ioctl: if T::HAS_COMPAT_IOCTL { > > + Some(fops_compat_ioctl::<T>) > > + } else if T::HAS_IOCTL { > > + Some(bindings::compat_ptr_ioctl) > > + } else { > > + None > > + }, > > + ..unsafe { MaybeUninit::zeroed().assume_init() } > > With the lints series queued for the next cycle, Clippy spots the > missing `// SAFETY` comment here... > > > +unsafe extern "C" fn fops_open<T: MiscDevice>( > > + inode: *mut bindings::inode, > > + file: *mut bindings::file, > > +) -> c_int { > > ...as well as the missing `# Safety` section for each of these. > > It can be seen in e.g. today's -next. > > I hope that helps! I sent https://lore.kernel.org/all/20241022-miscdevice-unsafe-warn-fix-v1-1-a78fde1740d6@google.com/ Thanks! Alice
On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote: > Provide a `MiscDevice` trait that lets you specify the file operations > that you wish to provide for your misc device. For now, only three file > operations are provided: open, close, ioctl. > > These abstractions only support MISC_DYNAMIC_MINOR. This enforces that > new miscdevices should not hard-code a minor number. > > When implementing ioctl, the Result type is used. This means that you > can choose to return either of: > * An integer of type isize. > * An errno using the kernel::error::Error type. > When returning an isize, the integer is returned verbatim. It's mainly > intended for returning positive integers to userspace. However, it is > technically possible to return errors via the isize return value too. > > To avoid having a dependency on files, this patch does not provide the > file operations callbacks a pointer to the file. This means that they > cannot check file properties such as O_NONBLOCK (which Binder needs). > Support for that can be added as a follow-up. > > To avoid having a dependency on vma, this patch does not provide any way > to implement mmap (which Binder needs). Support for that can be added as > a follow-up. > > Rust Binder will use these abstractions to create the /dev/binder file > when binderfs is disabled. > > Signed-off-by: Alice Ryhl <aliceryhl@google.com> > --- > rust/bindings/bindings_helper.h | 1 + > rust/kernel/lib.rs | 1 + > rust/kernel/miscdevice.rs | 241 ++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 243 insertions(+) > > diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h > index ae82e9c941af..84303bf221dd 100644 > --- a/rust/bindings/bindings_helper.h > +++ b/rust/bindings/bindings_helper.h > @@ -15,6 +15,7 @@ > #include <linux/firmware.h> > #include <linux/jiffies.h> > #include <linux/mdio.h> > +#include <linux/miscdevice.h> > #include <linux/phy.h> > #include <linux/refcount.h> > #include <linux/sched.h> > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > index 22a3bfa5a9e9..e268eae54c81 100644 > --- a/rust/kernel/lib.rs > +++ b/rust/kernel/lib.rs > @@ -39,6 +39,7 @@ > #[cfg(CONFIG_KUNIT)] > pub mod kunit; > pub mod list; > +pub mod miscdevice; > #[cfg(CONFIG_NET)] > pub mod net; > pub mod page; > diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs > new file mode 100644 > index 000000000000..cbd5249b5b45 > --- /dev/null > +++ b/rust/kernel/miscdevice.rs > @@ -0,0 +1,241 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +// Copyright (C) 2024 Google LLC. > + > +//! Miscdevice support. > +//! > +//! C headers: [`include/linux/miscdevice.h`](srctree/include/linux/miscdevice.h). > +//! > +//! Reference: <https://www.kernel.org/doc/html/latest/driver-api/misc_devices.html> > + > +use crate::{ > + bindings, > + error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, > + prelude::*, > + str::CStr, > + types::{ForeignOwnable, Opaque}, > +}; > +use core::{ > + ffi::{c_int, c_long, c_uint, c_ulong}, > + marker::PhantomData, > + mem::MaybeUninit, > + pin::Pin, > +}; > + > +/// Options for creating a misc device. > +#[derive(Copy, Clone)] > +pub struct MiscDeviceOptions { > + /// The name of the miscdevice. > + pub name: &'static CStr, > +} > + > +impl MiscDeviceOptions { > + /// Create a raw `struct miscdev` ready for registration. > + pub const fn into_raw<T: MiscDevice>(self) -> bindings::miscdevice { > + // SAFETY: All zeros is valid for this C type. > + let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() }; > + result.minor = bindings::MISC_DYNAMIC_MINOR as _; > + result.name = self.name.as_char_ptr(); > + result.fops = create_vtable::<T>(); > + result > + } > +} > + > +/// A registration of a miscdevice. > +/// > +/// # Invariants > +/// > +/// `inner` is a registered misc device. > +#[repr(transparent)] > +#[pin_data(PinnedDrop)] > +pub struct MiscDeviceRegistration<T> { > + #[pin] > + inner: Opaque<bindings::miscdevice>, > + _t: PhantomData<T>, > +} > + > +// SAFETY: It is allowed to call `misc_deregister` on a different thread from where you called > +// `misc_register`. > +unsafe impl<T> Send for MiscDeviceRegistration<T> {} > +// SAFETY: All `&self` methods on this type are written to ensure that it is safe to call them in > +// parallel. > +unsafe impl<T> Sync for MiscDeviceRegistration<T> {} > + > +impl<T: MiscDevice> MiscDeviceRegistration<T> { > + /// Register a misc device. > + pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> { > + try_pin_init!(Self { > + inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| { > + // SAFETY: The initializer can write to the provided `slot`. > + unsafe { slot.write(opts.into_raw::<T>()) }; > + > + // SAFETY: We just wrote the misc device options to the slot. The miscdevice will > + // get unregistered before `slot` is deallocated because the memory is pinned and > + // the destructor of this type deallocates the memory. > + // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered > + // misc device. > + to_result(unsafe { bindings::misc_register(slot) }) > + }), > + _t: PhantomData, > + }) > + } > + > + /// Returns a raw pointer to the misc device. > + pub fn as_raw(&self) -> *mut bindings::miscdevice { > + self.inner.get() > + } > +} > + > +#[pinned_drop] > +impl<T> PinnedDrop for MiscDeviceRegistration<T> { > + fn drop(self: Pin<&mut Self>) { > + // SAFETY: We know that the device is registered by the type invariants. > + unsafe { bindings::misc_deregister(self.inner.get()) }; > + } > +} > + > +/// Trait implemented by the private data of an open misc device. > +#[vtable] > +pub trait MiscDevice { > + /// What kind of pointer should `Self` be wrapped in. > + type Ptr: ForeignOwnable + Send + Sync; > + > + /// Called when the misc device is opened. > + /// > + /// The returned pointer will be stored as the private data for the file. > + fn open() -> Result<Self::Ptr>; > + > + /// Called when the misc device is released. > + fn release(device: Self::Ptr) { > + drop(device); > + } > + > + /// Handler for ioctls. > + /// > + /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`]. > + /// > + /// [`kernel::ioctl`]: mod@crate::ioctl > + fn ioctl( > + _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, > + _cmd: u32, > + _arg: usize, > + ) -> Result<isize> { > + kernel::build_error(VTABLE_DEFAULT_ERROR) > + } > + > + /// Handler for ioctls. > + /// > + /// Used for 32-bit userspace on 64-bit platforms. > + /// > + /// This method is optional and only needs to be provided if the ioctl relies on structures > + /// that have different layout on 32-bit and 64-bit userspace. If no implementation is > + /// provided, then `compat_ptr_ioctl` will be used instead. > + #[cfg(CONFIG_COMPAT)] > + fn compat_ioctl( > + _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, > + _cmd: u32, > + _arg: usize, > + ) -> Result<isize> { > + kernel::build_error(VTABLE_DEFAULT_ERROR) > + } > +} > + > +const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations { > + const fn maybe_fn<T: Copy>(check: bool, func: T) -> Option<T> { > + if check { > + Some(func) > + } else { > + None > + } > + } > + > + struct VtableHelper<T: MiscDevice> { > + _t: PhantomData<T>, > + } > + impl<T: MiscDevice> VtableHelper<T> { > + const VTABLE: bindings::file_operations = bindings::file_operations { > + open: Some(fops_open::<T>), > + release: Some(fops_release::<T>), > + unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::<T>), > + #[cfg(CONFIG_COMPAT)] > + compat_ioctl: if T::HAS_COMPAT_IOCTL { > + Some(fops_compat_ioctl::<T>) > + } else if T::HAS_IOCTL { > + Some(bindings::compat_ptr_ioctl) > + } else { > + None > + }, > + ..unsafe { MaybeUninit::zeroed().assume_init() } > + }; > + } > + > + &VtableHelper::<T>::VTABLE > +} > + > +unsafe extern "C" fn fops_open<T: MiscDevice>( > + inode: *mut bindings::inode, > + file: *mut bindings::file, > +) -> c_int { > + // SAFETY: The pointers are valid and for a file being opened. > + let ret = unsafe { bindings::generic_file_open(inode, file) }; > + if ret != 0 { > + return ret; > + } Do you have code where that function is used? Because this looks wrong or at least I don't understand from just a glance whether that generic_file_open() call makes sense. Illustrating how we get from opening /dev/binder to this call would help. > + > + let ptr = match T::open() { > + Ok(ptr) => ptr, > + Err(err) => return err.to_errno(), > + }; > + > + // SAFETY: The open call of a file owns the private data. > + unsafe { (*file).private_data = ptr.into_foreign().cast_mut() }; > + > + 0 > +} > + > +unsafe extern "C" fn fops_release<T: MiscDevice>( > + _inode: *mut bindings::inode, > + file: *mut bindings::file, > +) -> c_int { > + // SAFETY: The release call of a file owns the private data. > + let private = unsafe { (*file).private_data }; > + // SAFETY: The release call of a file owns the private data. > + let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) }; > + > + T::release(ptr); > + > + 0 > +} > + > +unsafe extern "C" fn fops_ioctl<T: MiscDevice>( > + file: *mut bindings::file, > + cmd: c_uint, > + arg: c_ulong, > +) -> c_long { > + // SAFETY: The ioctl call of a file can access the private data. > + let private = unsafe { (*file).private_data }; > + // SAFETY: Ioctl calls can borrow the private data of the file. > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; > + > + match T::ioctl(device, cmd as u32, arg as usize) { > + Ok(ret) => ret as c_long, > + Err(err) => err.to_errno() as c_long, > + } > +} > + > +#[cfg(CONFIG_COMPAT)] > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( > + file: *mut bindings::file, > + cmd: c_uint, > + arg: c_ulong, > +) -> c_long { > + // SAFETY: The compat ioctl call of a file can access the private data. > + let private = unsafe { (*file).private_data }; > + // SAFETY: Ioctl calls can borrow the private data of the file. > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; > + > + match T::compat_ioctl(device, cmd as u32, arg as usize) { > + Ok(ret) => ret as c_long, > + Err(err) => err.to_errno() as c_long, > + } > +} > > -- > 2.46.1.824.gd892dcdcdd-goog >
On Wed, Oct 2, 2024 at 4:06 PM Christian Brauner <brauner@kernel.org> wrote: > > On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote: > > +unsafe extern "C" fn fops_open<T: MiscDevice>( > > + inode: *mut bindings::inode, > > + file: *mut bindings::file, > > +) -> c_int { > > + // SAFETY: The pointers are valid and for a file being opened. > > + let ret = unsafe { bindings::generic_file_open(inode, file) }; > > + if ret != 0 { > > + return ret; > > + } > > Do you have code where that function is used? Because this looks wrong > or at least I don't understand from just a glance whether that > generic_file_open() call makes sense. > > Illustrating how we get from opening /dev/binder to this call would > help. Hmm. I wrote this by comparing with the ashmem open callback. Now that you mention it you are right that Binder does not call generic_file_open ... I have to admit that I don't know what generic_file_open does. Alice
On Wed, Oct 02, 2024 at 04:24:58PM GMT, Alice Ryhl wrote: > On Wed, Oct 2, 2024 at 4:06 PM Christian Brauner <brauner@kernel.org> wrote: > > > > On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote: > > > +unsafe extern "C" fn fops_open<T: MiscDevice>( > > > + inode: *mut bindings::inode, > > > + file: *mut bindings::file, > > > +) -> c_int { > > > + // SAFETY: The pointers are valid and for a file being opened. > > > + let ret = unsafe { bindings::generic_file_open(inode, file) }; > > > + if ret != 0 { > > > + return ret; > > > + } > > > > Do you have code where that function is used? Because this looks wrong > > or at least I don't understand from just a glance whether that > > generic_file_open() call makes sense. > > > > Illustrating how we get from opening /dev/binder to this call would > > help. > > Hmm. I wrote this by comparing with the ashmem open callback. Now that > you mention it you are right that Binder does not call > generic_file_open ... I have to admit that I don't know what > generic_file_open does. It's irrelevant for binder.
On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote: > Provide a `MiscDevice` trait that lets you specify the file operations > that you wish to provide for your misc device. For now, only three file > operations are provided: open, close, ioctl. > > These abstractions only support MISC_DYNAMIC_MINOR. This enforces that > new miscdevices should not hard-code a minor number. > > When implementing ioctl, the Result type is used. This means that you > can choose to return either of: > * An integer of type isize. > * An errno using the kernel::error::Error type. > When returning an isize, the integer is returned verbatim. It's mainly > intended for returning positive integers to userspace. However, it is > technically possible to return errors via the isize return value too. > > To avoid having a dependency on files, this patch does not provide the > file operations callbacks a pointer to the file. This means that they > cannot check file properties such as O_NONBLOCK (which Binder needs). > Support for that can be added as a follow-up. > > To avoid having a dependency on vma, this patch does not provide any way > to implement mmap (which Binder needs). Support for that can be added as > a follow-up. > > Rust Binder will use these abstractions to create the /dev/binder file > when binderfs is disabled. Do you really need to expose both CONFIG_BINDERFS and the hard-coded device creation stuff in the Kconfig that was done before that? Seems a bit pointless to me. > > Signed-off-by: Alice Ryhl <aliceryhl@google.com> > --- > rust/bindings/bindings_helper.h | 1 + > rust/kernel/lib.rs | 1 + > rust/kernel/miscdevice.rs | 241 ++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 243 insertions(+) > > diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h > index ae82e9c941af..84303bf221dd 100644 > --- a/rust/bindings/bindings_helper.h > +++ b/rust/bindings/bindings_helper.h > @@ -15,6 +15,7 @@ > #include <linux/firmware.h> > #include <linux/jiffies.h> > #include <linux/mdio.h> > +#include <linux/miscdevice.h> > #include <linux/phy.h> > #include <linux/refcount.h> > #include <linux/sched.h> > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > index 22a3bfa5a9e9..e268eae54c81 100644 > --- a/rust/kernel/lib.rs > +++ b/rust/kernel/lib.rs > @@ -39,6 +39,7 @@ > #[cfg(CONFIG_KUNIT)] > pub mod kunit; > pub mod list; > +pub mod miscdevice; > #[cfg(CONFIG_NET)] > pub mod net; > pub mod page; > diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs > new file mode 100644 > index 000000000000..cbd5249b5b45 > --- /dev/null > +++ b/rust/kernel/miscdevice.rs > @@ -0,0 +1,241 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +// Copyright (C) 2024 Google LLC. > + > +//! Miscdevice support. > +//! > +//! C headers: [`include/linux/miscdevice.h`](srctree/include/linux/miscdevice.h). > +//! > +//! Reference: <https://www.kernel.org/doc/html/latest/driver-api/misc_devices.html> > + > +use crate::{ > + bindings, > + error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, > + prelude::*, > + str::CStr, > + types::{ForeignOwnable, Opaque}, > +}; > +use core::{ > + ffi::{c_int, c_long, c_uint, c_ulong}, > + marker::PhantomData, > + mem::MaybeUninit, > + pin::Pin, > +}; > + > +/// Options for creating a misc device. > +#[derive(Copy, Clone)] > +pub struct MiscDeviceOptions { > + /// The name of the miscdevice. > + pub name: &'static CStr, > +} > + > +impl MiscDeviceOptions { > + /// Create a raw `struct miscdev` ready for registration. > + pub const fn into_raw<T: MiscDevice>(self) -> bindings::miscdevice { > + // SAFETY: All zeros is valid for this C type. > + let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() }; > + result.minor = bindings::MISC_DYNAMIC_MINOR as _; > + result.name = self.name.as_char_ptr(); > + result.fops = create_vtable::<T>(); > + result > + } > +} > + > +/// A registration of a miscdevice. > +/// > +/// # Invariants > +/// > +/// `inner` is a registered misc device. > +#[repr(transparent)] > +#[pin_data(PinnedDrop)] > +pub struct MiscDeviceRegistration<T> { > + #[pin] > + inner: Opaque<bindings::miscdevice>, > + _t: PhantomData<T>, > +} > + > +// SAFETY: It is allowed to call `misc_deregister` on a different thread from where you called > +// `misc_register`. > +unsafe impl<T> Send for MiscDeviceRegistration<T> {} > +// SAFETY: All `&self` methods on this type are written to ensure that it is safe to call them in > +// parallel. > +unsafe impl<T> Sync for MiscDeviceRegistration<T> {} > + > +impl<T: MiscDevice> MiscDeviceRegistration<T> { > + /// Register a misc device. > + pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> { > + try_pin_init!(Self { > + inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| { > + // SAFETY: The initializer can write to the provided `slot`. > + unsafe { slot.write(opts.into_raw::<T>()) }; > + > + // SAFETY: We just wrote the misc device options to the slot. The miscdevice will > + // get unregistered before `slot` is deallocated because the memory is pinned and > + // the destructor of this type deallocates the memory. > + // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered > + // misc device. > + to_result(unsafe { bindings::misc_register(slot) }) > + }), > + _t: PhantomData, > + }) > + } > + > + /// Returns a raw pointer to the misc device. > + pub fn as_raw(&self) -> *mut bindings::miscdevice { > + self.inner.get() > + } > +} > + > +#[pinned_drop] > +impl<T> PinnedDrop for MiscDeviceRegistration<T> { > + fn drop(self: Pin<&mut Self>) { > + // SAFETY: We know that the device is registered by the type invariants. > + unsafe { bindings::misc_deregister(self.inner.get()) }; > + } > +} > + > +/// Trait implemented by the private data of an open misc device. > +#[vtable] > +pub trait MiscDevice { > + /// What kind of pointer should `Self` be wrapped in. > + type Ptr: ForeignOwnable + Send + Sync; > + > + /// Called when the misc device is opened. > + /// > + /// The returned pointer will be stored as the private data for the file. > + fn open() -> Result<Self::Ptr>; > + > + /// Called when the misc device is released. > + fn release(device: Self::Ptr) { > + drop(device); > + } > + > + /// Handler for ioctls. > + /// > + /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`]. > + /// > + /// [`kernel::ioctl`]: mod@crate::ioctl > + fn ioctl( > + _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, > + _cmd: u32, > + _arg: usize, > + ) -> Result<isize> { > + kernel::build_error(VTABLE_DEFAULT_ERROR) > + } > + > + /// Handler for ioctls. > + /// > + /// Used for 32-bit userspace on 64-bit platforms. > + /// > + /// This method is optional and only needs to be provided if the ioctl relies on structures > + /// that have different layout on 32-bit and 64-bit userspace. If no implementation is > + /// provided, then `compat_ptr_ioctl` will be used instead. > + #[cfg(CONFIG_COMPAT)] > + fn compat_ioctl( > + _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, > + _cmd: u32, > + _arg: usize, > + ) -> Result<isize> { > + kernel::build_error(VTABLE_DEFAULT_ERROR) > + } > +} > + > +const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations { > + const fn maybe_fn<T: Copy>(check: bool, func: T) -> Option<T> { > + if check { > + Some(func) > + } else { > + None > + } > + } > + > + struct VtableHelper<T: MiscDevice> { > + _t: PhantomData<T>, > + } > + impl<T: MiscDevice> VtableHelper<T> { > + const VTABLE: bindings::file_operations = bindings::file_operations { > + open: Some(fops_open::<T>), > + release: Some(fops_release::<T>), > + unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::<T>), > + #[cfg(CONFIG_COMPAT)] > + compat_ioctl: if T::HAS_COMPAT_IOCTL { > + Some(fops_compat_ioctl::<T>) > + } else if T::HAS_IOCTL { > + Some(bindings::compat_ptr_ioctl) > + } else { > + None > + }, > + ..unsafe { MaybeUninit::zeroed().assume_init() } > + }; > + } > + > + &VtableHelper::<T>::VTABLE > +} > + > +unsafe extern "C" fn fops_open<T: MiscDevice>( > + inode: *mut bindings::inode, > + file: *mut bindings::file, > +) -> c_int { > + // SAFETY: The pointers are valid and for a file being opened. > + let ret = unsafe { bindings::generic_file_open(inode, file) }; > + if ret != 0 { > + return ret; > + } > + > + let ptr = match T::open() { > + Ok(ptr) => ptr, > + Err(err) => return err.to_errno(), > + }; > + > + // SAFETY: The open call of a file owns the private data. > + unsafe { (*file).private_data = ptr.into_foreign().cast_mut() }; > + > + 0 > +} > + > +unsafe extern "C" fn fops_release<T: MiscDevice>( > + _inode: *mut bindings::inode, > + file: *mut bindings::file, > +) -> c_int { > + // SAFETY: The release call of a file owns the private data. > + let private = unsafe { (*file).private_data }; > + // SAFETY: The release call of a file owns the private data. > + let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) }; > + > + T::release(ptr); > + > + 0 > +} > + > +unsafe extern "C" fn fops_ioctl<T: MiscDevice>( > + file: *mut bindings::file, > + cmd: c_uint, > + arg: c_ulong, > +) -> c_long { > + // SAFETY: The ioctl call of a file can access the private data. > + let private = unsafe { (*file).private_data }; > + // SAFETY: Ioctl calls can borrow the private data of the file. > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; > + > + match T::ioctl(device, cmd as u32, arg as usize) { > + Ok(ret) => ret as c_long, > + Err(err) => err.to_errno() as c_long, > + } > +} > + > +#[cfg(CONFIG_COMPAT)] > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( > + file: *mut bindings::file, > + cmd: c_uint, > + arg: c_ulong, > +) -> c_long { > + // SAFETY: The compat ioctl call of a file can access the private data. > + let private = unsafe { (*file).private_data }; > + // SAFETY: Ioctl calls can borrow the private data of the file. > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; > + > + match T::compat_ioctl(device, cmd as u32, arg as usize) { > + Ok(ret) => ret as c_long, > + Err(err) => err.to_errno() as c_long, > + } > +} > > -- > 2.46.1.824.gd892dcdcdd-goog >
On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote: > +#[cfg(CONFIG_COMPAT)] > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( > + file: *mut bindings::file, > + cmd: c_uint, > + arg: c_ulong, > +) -> c_long { > + // SAFETY: The compat ioctl call of a file can access the private > data. > + let private = unsafe { (*file).private_data }; > + // SAFETY: Ioctl calls can borrow the private data of the file. > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) > }; > + > + match T::compat_ioctl(device, cmd as u32, arg as usize) { > + Ok(ret) => ret as c_long, > + Err(err) => err.to_errno() as c_long, > + } > +} I think this works fine as a 1:1 mapping of the C API, so this is certainly something we can do. On the other hand, it would be nice to improve the interface in some way and make it better than the C version. The changes that I think would be straightforward and helpful are: - combine native and compat handlers and pass a flag argument that the callback can check in case it has to do something special for compat mode - pass the 'arg' value as both a __user pointer and a 'long' value to avoid having to cast. This specifically simplifies the compat version since that needs different types of 64-bit extension for incoming 32-bit values. On top of that, my ideal implementation would significantly simplify writing safe ioctl handlers by using the information encoded in the command word: - copy the __user data into a kernel buffer for _IOW() and back for _IOR() type commands, or both for _IOWR() - check that the argument size matches the size of the structure it gets assigned to We have a couple of subsystems in the kernel that already do something like this, but they all do it differently. For newly written drivers in rust, we could try to do this well from the start and only offer a single reliable way to do it. For drivers implementing existing ioctl commands, an additional complication is that there are many command codes that encode incorrect size/direction data, or none at all. I don't know if there is a good way to do that last bit in rust, and even if there is, we may well decide to not do it at first in order to get something working. Arnd
On Wed, Oct 02, 2024 at 12:48:12PM GMT, Arnd Bergmann wrote: > On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote: > > +#[cfg(CONFIG_COMPAT)] > > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( > > + file: *mut bindings::file, > > + cmd: c_uint, > > + arg: c_ulong, > > +) -> c_long { > > + // SAFETY: The compat ioctl call of a file can access the private > > data. > > + let private = unsafe { (*file).private_data }; > > + // SAFETY: Ioctl calls can borrow the private data of the file. > > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) > > }; > > + > > + match T::compat_ioctl(device, cmd as u32, arg as usize) { > > + Ok(ret) => ret as c_long, > > + Err(err) => err.to_errno() as c_long, > > + } > > +} > > I think this works fine as a 1:1 mapping of the C API, so this > is certainly something we can do. On the other hand, it would be > nice to improve the interface in some way and make it better than > the C version. > > The changes that I think would be straightforward and helpful are: > > - combine native and compat handlers and pass a flag argument > that the callback can check in case it has to do something > special for compat mode > > - pass the 'arg' value as both a __user pointer and a 'long' > value to avoid having to cast. This specifically simplifies > the compat version since that needs different types of > 64-bit extension for incoming 32-bit values. > > On top of that, my ideal implementation would significantly > simplify writing safe ioctl handlers by using the information > encoded in the command word: > > - copy the __user data into a kernel buffer for _IOW() > and back for _IOR() type commands, or both for _IOWR() > - check that the argument size matches the size of the > structure it gets assigned to - Handle versioning by size for ioctl()s correctly so stuff like: /* extensible ioctls */ switch (_IOC_NR(ioctl)) { case _IOC_NR(NS_MNT_GET_INFO): { struct mnt_ns_info kinfo = {}; struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; size_t usize = _IOC_SIZE(ioctl); if (ns->ops->type != CLONE_NEWNS) return -EINVAL; if (!uinfo) return -EINVAL; if (usize < MNT_NS_INFO_SIZE_VER0) return -EINVAL; return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); } This is not well-known and noone versions ioctl()s correctly and if they do it's their own hand-rolled thing. Ideally, this would be a first class concept with Rust bindings and versioning like this would be universally enforced. > > We have a couple of subsystems in the kernel that already > do something like this, but they all do it differently. > For newly written drivers in rust, we could try to do > this well from the start and only offer a single reliable > way to do it. For drivers implementing existing ioctl > commands, an additional complication is that there are > many command codes that encode incorrect size/direction > data, or none at all. > > I don't know if there is a good way to do that last bit > in rust, and even if there is, we may well decide to not > do it at first in order to get something working. > > Arnd
On Wed, Oct 2, 2024 at 3:24 PM Christian Brauner <brauner@kernel.org> wrote: > > On Wed, Oct 02, 2024 at 12:48:12PM GMT, Arnd Bergmann wrote: > > On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote: > > > +#[cfg(CONFIG_COMPAT)] > > > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( > > > + file: *mut bindings::file, > > > + cmd: c_uint, > > > + arg: c_ulong, > > > +) -> c_long { > > > + // SAFETY: The compat ioctl call of a file can access the private > > > data. > > > + let private = unsafe { (*file).private_data }; > > > + // SAFETY: Ioctl calls can borrow the private data of the file. > > > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) > > > }; > > > + > > > + match T::compat_ioctl(device, cmd as u32, arg as usize) { > > > + Ok(ret) => ret as c_long, > > > + Err(err) => err.to_errno() as c_long, > > > + } > > > +} > > > > I think this works fine as a 1:1 mapping of the C API, so this > > is certainly something we can do. On the other hand, it would be > > nice to improve the interface in some way and make it better than > > the C version. > > > > The changes that I think would be straightforward and helpful are: > > > > - combine native and compat handlers and pass a flag argument > > that the callback can check in case it has to do something > > special for compat mode > > > > - pass the 'arg' value as both a __user pointer and a 'long' > > value to avoid having to cast. This specifically simplifies > > the compat version since that needs different types of > > 64-bit extension for incoming 32-bit values. > > > > On top of that, my ideal implementation would significantly > > simplify writing safe ioctl handlers by using the information > > encoded in the command word: > > > > - copy the __user data into a kernel buffer for _IOW() > > and back for _IOR() type commands, or both for _IOWR() > > - check that the argument size matches the size of the > > structure it gets assigned to > > - Handle versioning by size for ioctl()s correctly so stuff like: > > /* extensible ioctls */ > switch (_IOC_NR(ioctl)) { > case _IOC_NR(NS_MNT_GET_INFO): { > struct mnt_ns_info kinfo = {}; > struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; > size_t usize = _IOC_SIZE(ioctl); > > if (ns->ops->type != CLONE_NEWNS) > return -EINVAL; > > if (!uinfo) > return -EINVAL; > > if (usize < MNT_NS_INFO_SIZE_VER0) > return -EINVAL; > > return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); > } > > This is not well-known and noone versions ioctl()s correctly and if they > do it's their own hand-rolled thing. Ideally, this would be a first > class concept with Rust bindings and versioning like this would be > universally enforced. Could you point me at some more complete documentation or example of how to correctly do versioning? Alice
On Wed, Oct 02, 2024 at 03:36:33PM GMT, Alice Ryhl wrote: > On Wed, Oct 2, 2024 at 3:24 PM Christian Brauner <brauner@kernel.org> wrote: > > > > On Wed, Oct 02, 2024 at 12:48:12PM GMT, Arnd Bergmann wrote: > > > On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote: > > > > +#[cfg(CONFIG_COMPAT)] > > > > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( > > > > + file: *mut bindings::file, > > > > + cmd: c_uint, > > > > + arg: c_ulong, > > > > +) -> c_long { > > > > + // SAFETY: The compat ioctl call of a file can access the private > > > > data. > > > > + let private = unsafe { (*file).private_data }; > > > > + // SAFETY: Ioctl calls can borrow the private data of the file. > > > > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) > > > > }; > > > > + > > > > + match T::compat_ioctl(device, cmd as u32, arg as usize) { > > > > + Ok(ret) => ret as c_long, > > > > + Err(err) => err.to_errno() as c_long, > > > > + } > > > > +} > > > > > > I think this works fine as a 1:1 mapping of the C API, so this > > > is certainly something we can do. On the other hand, it would be > > > nice to improve the interface in some way and make it better than > > > the C version. > > > > > > The changes that I think would be straightforward and helpful are: > > > > > > - combine native and compat handlers and pass a flag argument > > > that the callback can check in case it has to do something > > > special for compat mode > > > > > > - pass the 'arg' value as both a __user pointer and a 'long' > > > value to avoid having to cast. This specifically simplifies > > > the compat version since that needs different types of > > > 64-bit extension for incoming 32-bit values. > > > > > > On top of that, my ideal implementation would significantly > > > simplify writing safe ioctl handlers by using the information > > > encoded in the command word: > > > > > > - copy the __user data into a kernel buffer for _IOW() > > > and back for _IOR() type commands, or both for _IOWR() > > > - check that the argument size matches the size of the > > > structure it gets assigned to > > > > - Handle versioning by size for ioctl()s correctly so stuff like: > > > > /* extensible ioctls */ > > switch (_IOC_NR(ioctl)) { > > case _IOC_NR(NS_MNT_GET_INFO): { > > struct mnt_ns_info kinfo = {}; > > struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; > > size_t usize = _IOC_SIZE(ioctl); > > > > if (ns->ops->type != CLONE_NEWNS) > > return -EINVAL; > > > > if (!uinfo) > > return -EINVAL; > > > > if (usize < MNT_NS_INFO_SIZE_VER0) > > return -EINVAL; > > > > return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); > > } > > > > This is not well-known and noone versions ioctl()s correctly and if they > > do it's their own hand-rolled thing. Ideally, this would be a first > > class concept with Rust bindings and versioning like this would be > > universally enforced. > > Could you point me at some more complete documentation or example of > how to correctly do versioning? So I don't want you to lead astray so if this is out of reach for now I understand but basically we do have the concept of versioning structs by size. So I'm taking an example from the mount_setattr() man page though openat2() would also work: Extensibility In order to allow for future extensibility, mount_setattr() requires the user-space application to specify the size of the mount_attr structure that it is passing. By providing this information, it is possible for mount_setattr() to provide both forwards- and backwards-compatibility, with size acting as an implicit version number. (Because new extension fields will always be appended, the structure size will always increase.) This extensibility design is very similar to other system calls such as perf_setattr(2), perf_event_open(2), clone3(2) and openat2(2). Let usize be the size of the structure as specified by the user-space application, and let ksize be the size of the structure which the kernel supports, then there are three cases to consider: • If ksize equals usize, then there is no version mismatch and attr can be used verbatim. • If ksize is larger than usize, then there are some extension fields that the kernel supports which the user-space application is unaware of. Because a zero value in any added extension field signifies a no-op, the kernel treats all of the extension fields not provided by the user-space application as having zero values. This provides backwards-compatibility. • If ksize is smaller than usize, then there are some extension fields which the user-space application is aware of but which the kernel does not support. Because any extension field must have its zero values signify a no-op, the kernel can safely ignore the unsupported extension fields if they are all zero. If any unsupported extension fields are non-zero, then -1 is returned and errno is set to E2BIG. This provides forwards-compatibility. [...] In essence ioctl()s are already versioned by size because the size of the passed argument is encoded in the ioctl cmd: struct my_struct { __u64 a; }; ioctl(fd, MY_IOCTL, &my_struct); then _IOC_SIZE(MY_IOCTL) will give you the expected size. If the kernel extends the struct to: struct my_struct { __u64 a; __u64 b; }; then the value of MY_IOCTL changes. Most code currently cannot deal with such an extension because it's coded as a simple switch on the ioctl command: switch (cmd) { case MY_IOCTL: /* do something */ break; } So on an older kernel the ioctl would now fail because it won't be able to handle MY_STRUCT with an increased struct my_struct size because the switch wouldn't trigger. The correct way to handle this is to grab the actual ioctl number out from the ioctl command: switch (_IOC_NR(cmd)) { case _IOC_NR(MY_STRUCT): { and then grab the size of the ioctl: size_t usize = _IOC_SIZE(ioctl); perform sanity checks: // garbage if (usize < MY_STRUCT_SIZE_VER0) return -EINVAL; // ¿qué? if (usize > PAGE_SIZE) return -EINVAL; and then copy the stuff via copy_struct_from_user() or copy back out to user via other means. This way you can safely extend ioctl()s in a backward and forward compatible manner and if we can enforce this for new drivers then I think that's what we should do.
On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote: > and then copy the stuff via copy_struct_from_user() or copy back out to > user via other means. > > This way you can safely extend ioctl()s in a backward and forward > compatible manner and if we can enforce this for new drivers then I > think that's what we should do. I don't see much value in building generic code for ioctl around this specific variant of extensibility. Extending ioctl commands by having a larger structure that results in a new cmd code constant is fine, but there is little difference between doing this with the same or a different 'nr' value. Most drivers just always use a new nr here, and I see no reason to discourage that. There is actually a small risk in your example where it can break if you have the same size between native and compat variants of the same command, like struct old { long a; }; struct new { long a; int b; }; Here, the 64-bit 'old' has the same size as the 32-bit 'new', so if we try to handle them in a shared native/compat ioctl function, this needs an extra in_conmpat_syscall() check that adds complexity and is easy to forget. Arnd
On Wed, Oct 02, 2024 at 03:45:08PM GMT, Arnd Bergmann wrote: > On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote: > > > and then copy the stuff via copy_struct_from_user() or copy back out to > > user via other means. > > > > This way you can safely extend ioctl()s in a backward and forward > > compatible manner and if we can enforce this for new drivers then I > > think that's what we should do. > > I don't see much value in building generic code for ioctl around > this specific variant of extensibility. Extending ioctl commands > by having a larger structure that results in a new cmd code > constant is fine, but there is little difference between doing > this with the same or a different 'nr' value. Most drivers just > always use a new nr here, and I see no reason to discourage that. > > There is actually a small risk in your example where it can > break if you have the same size between native and compat > variants of the same command, like > > struct old { > long a; > }; > > struct new { > long a; > int b; > }; > > Here, the 64-bit 'old' has the same size as the 32-bit 'new', > so if we try to handle them in a shared native/compat ioctl > function, this needs an extra in_conmpat_syscall() check that > adds complexity and is easy to forget. This presupposes that we will have Rust drivers - not C drivers - that define structs like it's 1990. You yourself and me included try to enforce that structs are correctly aligned and padded. So I see this as a non-argument. We wouldn't let this slide in new system calls so I don't see why we would in new ioctls.
On Wed, Oct 02, 2024 at 03:45:08PM +0000, Arnd Bergmann wrote: > On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote: > > > and then copy the stuff via copy_struct_from_user() or copy back out to > > user via other means. > > > > This way you can safely extend ioctl()s in a backward and forward > > compatible manner and if we can enforce this for new drivers then I > > think that's what we should do. > > I don't see much value in building generic code for ioctl around > this specific variant of extensibility. Extending ioctl commands > by having a larger structure that results in a new cmd code > constant is fine, but there is little difference between doing > this with the same or a different 'nr' value. Most drivers just > always use a new nr here, and I see no reason to discourage that. > > There is actually a small risk in your example where it can > break if you have the same size between native and compat > variants of the same command, like > > struct old { > long a; > }; > > struct new { > long a; > int b; > }; > > Here, the 64-bit 'old' has the same size as the 32-bit 'new', > so if we try to handle them in a shared native/compat ioctl > function, this needs an extra in_conmpat_syscall() check that > adds complexity and is easy to forget. Agreed, "extending" ioctls is considered a bad thing and it's just easier to create a new one. Or use some flags and reserved fields, if you remember to add them in the beginning... Anyway, this is all great, but for now, I'll take this series in my tree and we can add onto it from there. I'll dig up some sample code that uses this too, so that we make sure it works properly. Give me a few days to catch up before it lands in my trees... thanks, greg k-h
On Wed, Oct 02, 2024 at 06:04:38PM GMT, Greg Kroah-Hartman wrote: > On Wed, Oct 02, 2024 at 03:45:08PM +0000, Arnd Bergmann wrote: > > On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote: > > > > > and then copy the stuff via copy_struct_from_user() or copy back out to > > > user via other means. > > > > > > This way you can safely extend ioctl()s in a backward and forward > > > compatible manner and if we can enforce this for new drivers then I > > > think that's what we should do. > > > > I don't see much value in building generic code for ioctl around > > this specific variant of extensibility. Extending ioctl commands > > by having a larger structure that results in a new cmd code > > constant is fine, but there is little difference between doing > > this with the same or a different 'nr' value. Most drivers just > > always use a new nr here, and I see no reason to discourage that. > > > > There is actually a small risk in your example where it can > > break if you have the same size between native and compat > > variants of the same command, like > > > > struct old { > > long a; > > }; > > > > struct new { > > long a; > > int b; > > }; > > > > Here, the 64-bit 'old' has the same size as the 32-bit 'new', > > so if we try to handle them in a shared native/compat ioctl > > function, this needs an extra in_conmpat_syscall() check that > > adds complexity and is easy to forget. > > Agreed, "extending" ioctls is considered a bad thing and it's just > easier to create a new one. Or use some flags and reserved fields, if > you remember to add them in the beginning... This statement misses the reality of what has been happening outside of arbitrary drivers for years. Let alone that it simply asserts that it's a bad thing.
On Wed, Oct 2, 2024, at 16:04, Greg Kroah-Hartman wrote: > On Wed, Oct 02, 2024 at 03:45:08PM +0000, Arnd Bergmann wrote: >> On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote: >> >> Here, the 64-bit 'old' has the same size as the 32-bit 'new', >> so if we try to handle them in a shared native/compat ioctl >> function, this needs an extra in_conmpat_syscall() check that >> adds complexity and is easy to forget. > > Agreed, "extending" ioctls is considered a bad thing and it's just > easier to create a new one. Or use some flags and reserved fields, if > you remember to add them in the beginning... > > Anyway, this is all great, but for now, I'll take this series in my tree > and we can add onto it from there. I'll dig up some sample code that > uses this too, so that we make sure it works properly. Give me a few > days to catch up before it lands in my trees... Sounds good to me, it's clear we don't get a quick solution and there is nothing stopping us from revisiting this after we have a couple of drivers using ioctl. Arnd
On Wed, Oct 2, 2024 at 2:48 PM Arnd Bergmann <arnd@arndb.de> wrote: > > On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote: > > +#[cfg(CONFIG_COMPAT)] > > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>( > > + file: *mut bindings::file, > > + cmd: c_uint, > > + arg: c_ulong, > > +) -> c_long { > > + // SAFETY: The compat ioctl call of a file can access the private > > data. > > + let private = unsafe { (*file).private_data }; > > + // SAFETY: Ioctl calls can borrow the private data of the file. > > + let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) > > }; > > + > > + match T::compat_ioctl(device, cmd as u32, arg as usize) { > > + Ok(ret) => ret as c_long, > > + Err(err) => err.to_errno() as c_long, > > + } > > +} > > I think this works fine as a 1:1 mapping of the C API, so this > is certainly something we can do. On the other hand, it would be > nice to improve the interface in some way and make it better than > the C version. > > The changes that I think would be straightforward and helpful are: > > - combine native and compat handlers and pass a flag argument > that the callback can check in case it has to do something > special for compat mode > > - pass the 'arg' value as both a __user pointer and a 'long' > value to avoid having to cast. This specifically simplifies > the compat version since that needs different types of > 64-bit extension for incoming 32-bit values. > > On top of that, my ideal implementation would significantly > simplify writing safe ioctl handlers by using the information > encoded in the command word: > > - copy the __user data into a kernel buffer for _IOW() > and back for _IOR() type commands, or both for _IOWR() > - check that the argument size matches the size of the > structure it gets assigned to > > We have a couple of subsystems in the kernel that already > do something like this, but they all do it differently. > For newly written drivers in rust, we could try to do > this well from the start and only offer a single reliable > way to do it. For drivers implementing existing ioctl > commands, an additional complication is that there are > many command codes that encode incorrect size/direction > data, or none at all. > > I don't know if there is a good way to do that last bit > in rust, and even if there is, we may well decide to not > do it at first in order to get something working. A quick sketch. One option is to do something along these lines: struct IoctlParams { pub cmd: u32, pub arg: usize, } impl IoctlParams { fn user_slice(&self) -> IoctlUser { let userslice = UserSlice::new(self.arg, _IOC_SIZE(self.cmd)); match _IOC_DIR(self.cmd) { _IOC_READ => IoctlParams::Read(userslice.reader()), _IOC_WRITE => IoctlParams::Write(userslice.writer()), _IOC_READ|_IOC_WRITE => IoctlParams::WriteRead(userslice), _ => unreachable!(), } } } enum IoctlUser { Read(UserSliceReader), Write(UserSliceWriter), WriteRead(UserSlice), } Then ioctl implementations can use a match statement like this: match ioc_params.user_slice() { IoctlUser::Read(slice) => {}, IoctlUser::Write(slice) => {}, IoctlUser::WriteRead(slice) => {}, } Where each branch of the match handles that case. Alice
On Tue, Oct 1, 2024 at 10:22 AM Alice Ryhl <aliceryhl@google.com> wrote: > +impl<T: MiscDevice> MiscDeviceRegistration<T> { > + /// Register a misc device. > + pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> { > + try_pin_init!(Self { > + inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| { > + // SAFETY: The initializer can write to the provided `slot`. > + unsafe { slot.write(opts.into_raw::<T>()) }; > + > + // SAFETY: We just wrote the misc device options to the slot. The miscdevice will > + // get unregistered before `slot` is deallocated because the memory is pinned and > + // the destructor of this type deallocates the memory. > + // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered > + // misc device. > + to_result(unsafe { bindings::misc_register(slot) }) > + }), > + _t: PhantomData, > + }) > + } Note that right now this can only be used in the module init function if the registration is stored in a pinned box. We need the in-place initialization change to fix that. Does anyone want to revive the in-place initialization patch? Link: https://lore.kernel.org/rust-for-linux/20240328195457.225001-1-wedsonaf@gmail.com/ Alice
On 01.10.2024 10:22, Alice Ryhl wrote: .... > diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs > new file mode 100644 > index 000000000000..cbd5249b5b45 > --- /dev/null > +++ b/rust/kernel/miscdevice.rs ... > +/// Trait implemented by the private data of an open misc device. > +#[vtable] > +pub trait MiscDevice { > + /// What kind of pointer should `Self` be wrapped in. > + type Ptr: ForeignOwnable + Send + Sync; > + > + /// Called when the misc device is opened. > + /// > + /// The returned pointer will be stored as the private data for the file. > + fn open() -> Result<Self::Ptr>; > + > + /// Called when the misc device is released. > + fn release(device: Self::Ptr) { > + drop(device); > + } > + > + /// Handler for ioctls. > + /// > + /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`]. Nit: utilties -> utilities Dirk
On Tue, Oct 1, 2024 at 10:54 AM Dirk Behme <dirk.behme@de.bosch.com> wrote: > > On 01.10.2024 10:22, Alice Ryhl wrote: > .... > > diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs > > new file mode 100644 > > index 000000000000..cbd5249b5b45 > > --- /dev/null > > +++ b/rust/kernel/miscdevice.rs > ... > > +/// Trait implemented by the private data of an open misc device. > > +#[vtable] > > +pub trait MiscDevice { > > + /// What kind of pointer should `Self` be wrapped in. > > + type Ptr: ForeignOwnable + Send + Sync; > > + > > + /// Called when the misc device is opened. > > + /// > > + /// The returned pointer will be stored as the private data for the file. > > + fn open() -> Result<Self::Ptr>; > > + > > + /// Called when the misc device is released. > > + fn release(device: Self::Ptr) { > > + drop(device); > > + } > > + > > + /// Handler for ioctls. > > + /// > > + /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`]. > > Nit: utilties -> utilities Thanks! Alice
© 2016 - 2024 Red Hat, Inc.