Add a short exercise for Rust's per-CPU variable API, modelled after
lib/percpu_test.c
Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
---
lib/Kconfig.debug | 9 ++++
lib/Makefile | 1 +
lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++
rust/helpers/percpu.c | 11 +++++
4 files changed, 141 insertions(+)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ebe33181b6e6..959ce156c601 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2418,6 +2418,15 @@ config PERCPU_TEST
If unsure, say N.
+config PERCPU_TEST_RUST
+ tristate "Rust per cpu operations test"
+ depends on m && DEBUG_KERNEL && RUST
+ help
+ Enable this option to build a test module which validates Rust per-cpu
+ operations.
+
+ If unsure, say N.
+
config ATOMIC64_SELFTEST
tristate "Perform an atomic64_t self-test"
help
diff --git a/lib/Makefile b/lib/Makefile
index c38582f187dd..ab19106cc22c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -288,6 +288,7 @@ obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
+obj-$(CONFIG_PERCPU_TEST_RUST) += percpu_test_rust.o
obj-$(CONFIG_ASN1) += asn1_decoder.o
obj-$(CONFIG_ASN1_ENCODER) += asn1_encoder.o
diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
new file mode 100644
index 000000000000..a9652e6ece08
--- /dev/null
+++ b/lib/percpu_test_rust.rs
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+//! A simple self test for the rust per-CPU API.
+
+use core::ffi::c_void;
+
+use kernel::{
+ bindings::{on_each_cpu, smp_processor_id},
+ define_per_cpu,
+ percpu::{cpu_guard::*, *},
+ pr_info,
+ prelude::*,
+ unsafe_get_per_cpu,
+};
+
+module! {
+ type: PerCpuTestModule,
+ name: "percpu_test_rust",
+ author: "Mitchell Levy",
+ description: "Test code to exercise the Rust Per CPU variable API",
+ license: "GPL v2",
+}
+
+struct PerCpuTestModule;
+
+define_per_cpu!(PERCPU: i64 = 0);
+define_per_cpu!(UPERCPU: u64 = 0);
+
+impl kernel::Module for PerCpuTestModule {
+ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
+ pr_info!("rust percpu test start\n");
+
+ let mut native: i64 = 0;
+ // SAFETY: PERCPU is properly defined
+ let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
+ // SAFETY: We only have one PerCpu that points at PERCPU
+ unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
+ pr_info!("The contents of pcpu are {}\n", val);
+
+ native += -1;
+ *val += -1;
+ pr_info!("Native: {}, *pcpu: {}\n", native, val);
+ assert!(native == *val && native == -1);
+
+ native += 1;
+ *val += 1;
+ pr_info!("Native: {}, *pcpu: {}\n", native, val);
+ assert!(native == *val && native == 0);
+ });
+
+ let mut unative: u64 = 0;
+ // SAFETY: UPERCPU is properly defined
+ let mut upcpu: StaticPerCpu<u64> = unsafe { unsafe_get_per_cpu!(UPERCPU) };
+
+ // SAFETY: We only have one PerCpu pointing at UPERCPU
+ unsafe { upcpu.get(CpuGuard::new()) }.with(|val: &mut u64| {
+ unative += 1;
+ *val += 1;
+ pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+ assert!(unative == *val && unative == 1);
+
+ unative = unative.wrapping_add((-1i64) as u64);
+ *val = val.wrapping_add((-1i64) as u64);
+ pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+ assert!(unative == *val && unative == 0);
+
+ unative = unative.wrapping_add((-1i64) as u64);
+ *val = val.wrapping_add((-1i64) as u64);
+ pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+ assert!(unative == *val && unative == (-1i64) as u64);
+
+ unative = 0;
+ *val = 0;
+
+ unative = unative.wrapping_sub(1);
+ *val = val.wrapping_sub(1);
+ pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+ assert!(unative == *val && unative == (-1i64) as u64);
+ assert!(unative == *val && unative == u64::MAX);
+ });
+
+ pr_info!("rust static percpu test done\n");
+
+ pr_info!("rust dynamic percpu test start\n");
+ let mut test: DynamicPerCpu<u64> = DynamicPerCpu::new(GFP_KERNEL).unwrap();
+
+ // SAFETY: No prerequisites for on_each_cpu.
+ unsafe {
+ on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 0);
+ on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 0);
+ on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 0);
+ on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 1);
+ on_each_cpu(Some(check_percpu), (&raw mut test) as *mut c_void, 1);
+ }
+
+ pr_info!("rust dynamic percpu test done\n");
+
+ // Return Err to unload the module
+ Result::Err(EINVAL)
+ }
+}
+
+extern "C" fn inc_percpu(info: *mut c_void) {
+ // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
+ let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
+ // SAFETY: smp_processor_id has no preconditions
+ pr_info!("Incrementing on {}\n", unsafe { smp_processor_id() });
+
+ // SAFETY: We don't have multiple clones of pcpu in scope
+ unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut u64| *val += 1);
+}
+
+extern "C" fn check_percpu(info: *mut c_void) {
+ // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
+ let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
+ // SAFETY: smp_processor_id has no preconditions
+ pr_info!("Asserting on {}\n", unsafe { smp_processor_id() });
+
+ // SAFETY: We don't have multiple clones of pcpu in scope
+ unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut u64| assert!(*val == 4));
+}
diff --git a/rust/helpers/percpu.c b/rust/helpers/percpu.c
index a091389f730f..0e9b2fed3ebd 100644
--- a/rust/helpers/percpu.c
+++ b/rust/helpers/percpu.c
@@ -1,9 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/percpu.h>
+#include <linux/smp.h>
void __percpu *rust_helper_alloc_percpu(size_t sz, size_t align)
{
return __alloc_percpu(sz, align);
}
+void rust_helper_on_each_cpu(smp_call_func_t func, void *info, int wait)
+{
+ on_each_cpu(func, info, wait);
+}
+
+int rust_helper_smp_processor_id(void)
+{
+ return smp_processor_id();
+}
+
--
2.34.1
On Sat Jul 12, 2025 at 11:31 PM CEST, Mitchell Levy wrote: > Add a short exercise for Rust's per-CPU variable API, modelled after > lib/percpu_test.c > > Signed-off-by: Mitchell Levy <levymitchell0@gmail.com> > --- > lib/Kconfig.debug | 9 ++++ > lib/Makefile | 1 + > lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++ I don't know if this is the correct place, the code looks much more like a sample, so why not place it there instead? > rust/helpers/percpu.c | 11 +++++ > 4 files changed, 141 insertions(+) > diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs > new file mode 100644 > index 000000000000..a9652e6ece08 > --- /dev/null > +++ b/lib/percpu_test_rust.rs > @@ -0,0 +1,120 @@ > +// SPDX-License-Identifier: GPL-2.0 > +//! A simple self test for the rust per-CPU API. > + > +use core::ffi::c_void; > + > +use kernel::{ > + bindings::{on_each_cpu, smp_processor_id}, > + define_per_cpu, > + percpu::{cpu_guard::*, *}, > + pr_info, > + prelude::*, > + unsafe_get_per_cpu, > +}; > + > +module! { > + type: PerCpuTestModule, > + name: "percpu_test_rust", > + author: "Mitchell Levy", > + description: "Test code to exercise the Rust Per CPU variable API", > + license: "GPL v2", > +} > + > +struct PerCpuTestModule; > + > +define_per_cpu!(PERCPU: i64 = 0); > +define_per_cpu!(UPERCPU: u64 = 0); > + > +impl kernel::Module for PerCpuTestModule { > + fn init(_module: &'static ThisModule) -> Result<Self, Error> { > + pr_info!("rust percpu test start\n"); > + > + let mut native: i64 = 0; > + // SAFETY: PERCPU is properly defined > + let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; I don't understand why we need unsafe here, can't we just create something specially in the `define_per_cpu` macro that is then confirmed by the `get_per_cpu!` macro and thus it can be safe? > + // SAFETY: We only have one PerCpu that points at PERCPU > + unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| { Hmm I also don't like the unsafe part here... Can't we use the same API that `thread_local!` in the standard library has: https://doc.rust-lang.org/std/macro.thread_local.html So in this example you would store a `Cell<i64>` instead. I'm not familiar with per CPU variables, but if you're usually storing `Copy` types, then this is much better wrt not having unsafe code everywhere. If one also often stores `!Copy` types, then we might be able to get away with `RefCell`, but that's a small runtime overhead -- which is probably bad given that per cpu variables are most likely used for performance reasons? In that case the user might just need to store `UnsafeCell` and use unsafe regardless. (or we invent something specifically for that case, eg tokens that are statically known to be unique etc) --- Cheers, Benno > + pr_info!("The contents of pcpu are {}\n", val); > + > + native += -1; > + *val += -1; > + pr_info!("Native: {}, *pcpu: {}\n", native, val); > + assert!(native == *val && native == -1); > + > + native += 1; > + *val += 1; > + pr_info!("Native: {}, *pcpu: {}\n", native, val); > + assert!(native == *val && native == 0); > + });
On Sun, Jul 13, 2025 at 11:30:31AM +0200, Benno Lossin wrote: > On Sat Jul 12, 2025 at 11:31 PM CEST, Mitchell Levy wrote: > > Add a short exercise for Rust's per-CPU variable API, modelled after > > lib/percpu_test.c > > > > Signed-off-by: Mitchell Levy <levymitchell0@gmail.com> > > --- > > lib/Kconfig.debug | 9 ++++ > > lib/Makefile | 1 + > > lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++ > > I don't know if this is the correct place, the code looks much more like > a sample, so why not place it there instead? I don't feel particularly strongly either way --- I defaulted to `lib/` since that's where the `percpu_test.c` I was working off of is located. Happy to change for v3 > > rust/helpers/percpu.c | 11 +++++ > > 4 files changed, 141 insertions(+) > > diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs > > new file mode 100644 > > index 000000000000..a9652e6ece08 > > --- /dev/null > > +++ b/lib/percpu_test_rust.rs > > @@ -0,0 +1,120 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +//! A simple self test for the rust per-CPU API. > > + > > +use core::ffi::c_void; > > + > > +use kernel::{ > > + bindings::{on_each_cpu, smp_processor_id}, > > + define_per_cpu, > > + percpu::{cpu_guard::*, *}, > > + pr_info, > > + prelude::*, > > + unsafe_get_per_cpu, > > +}; > > + > > +module! { > > + type: PerCpuTestModule, > > + name: "percpu_test_rust", > > + author: "Mitchell Levy", > > + description: "Test code to exercise the Rust Per CPU variable API", > > + license: "GPL v2", > > +} > > + > > +struct PerCpuTestModule; > > + > > +define_per_cpu!(PERCPU: i64 = 0); > > +define_per_cpu!(UPERCPU: u64 = 0); > > + > > +impl kernel::Module for PerCpuTestModule { > > + fn init(_module: &'static ThisModule) -> Result<Self, Error> { > > + pr_info!("rust percpu test start\n"); > > + > > + let mut native: i64 = 0; > > + // SAFETY: PERCPU is properly defined > > + let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; > > I don't understand why we need unsafe here, can't we just create > something specially in the `define_per_cpu` macro that is then confirmed > by the `get_per_cpu!` macro and thus it can be safe? As is, something like define_per_cpu!(PERCPU: i32 = 0); fn func() { let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; } will compile, but any usage of `pcpu` will be UB. This is because `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I know, the compiler does not do any checking of pointer casts. If you have thoughts/ideas on how to get around this problem, I'd certainly *like* to provide a safe API here :) > > + // SAFETY: We only have one PerCpu that points at PERCPU > > + unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| { > > Hmm I also don't like the unsafe part here... > > Can't we use the same API that `thread_local!` in the standard library > has: > > https://doc.rust-lang.org/std/macro.thread_local.html > > So in this example you would store a `Cell<i64>` instead. > > I'm not familiar with per CPU variables, but if you're usually storing > `Copy` types, then this is much better wrt not having unsafe code > everywhere. > > If one also often stores `!Copy` types, then we might be able to get > away with `RefCell`, but that's a small runtime overhead -- which is > probably bad given that per cpu variables are most likely used for > performance reasons? In that case the user might just need to store > `UnsafeCell` and use unsafe regardless. (or we invent something > specifically for that case, eg tokens that are statically known to be > unique etc) I'm open to including a specialization for `T: Copy` in a similar vein to what I have here for numeric types. Off the top of my head, that shouldn't require any user-facing `unsafe`. But yes, I believe there is a significant amount of interest in having `!Copy` per-CPU variables. (At least, I'm interested in having them around for experimenting with using Rust for HV drivers.) I would definitely like to avoid *requiring* the use of `RefCell` since, as you mention, it does have a runtime overhead. Per-CPU variables can be used for "logical" reasons rather than just as a performance optimization, so there might be some cases where paying the runtime overhead is ok. But that's certainly not true in all cases. That said, perhaps there could be a safely obtainable token type that only passes a `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't mind the runtime overhead, they can choose `T` to be a `RefCell`. Thoughts? For `UnsafeCell`, if a user of the API were to have something like a `PerCpu<UnsafeCell<T>>` that safely spits out a `&UnsafeCell<T>`, my understanding is that mutating the underlying `T` would require the exact same safety guarantees as what's here, except now it'd need a much bigger unsafe block and would have to do all of its manipulations via pointers. That seems like a pretty big ergonomics burden without a clear (to me) benefit. > --- > Cheers, > Benno > > > + pr_info!("The contents of pcpu are {}\n", val); > > + > > + native += -1; > > + *val += -1; > > + pr_info!("Native: {}, *pcpu: {}\n", native, val); > > + assert!(native == *val && native == -1); > > + > > + native += 1; > > + *val += 1; > > + pr_info!("Native: {}, *pcpu: {}\n", native, val); > > + assert!(native == *val && native == 0); > > + });
On Tue Jul 15, 2025 at 12:31 PM CEST, Mitchell Levy wrote: > On Sun, Jul 13, 2025 at 11:30:31AM +0200, Benno Lossin wrote: >> On Sat Jul 12, 2025 at 11:31 PM CEST, Mitchell Levy wrote: >> > Add a short exercise for Rust's per-CPU variable API, modelled after >> > lib/percpu_test.c >> > >> > Signed-off-by: Mitchell Levy <levymitchell0@gmail.com> >> > --- >> > lib/Kconfig.debug | 9 ++++ >> > lib/Makefile | 1 + >> > lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++ >> >> I don't know if this is the correct place, the code looks much more like >> a sample, so why not place it there instead? > > I don't feel particularly strongly either way --- I defaulted to `lib/` > since that's where the `percpu_test.c` I was working off of is located. > Happy to change for v3 Since we don't have Rust stuff in lib/ yet (and that the code looks much more like the samples we already have) I think putting it in samples/rust is better. >> > rust/helpers/percpu.c | 11 +++++ >> > 4 files changed, 141 insertions(+) >> > diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs >> > new file mode 100644 >> > index 000000000000..a9652e6ece08 >> > --- /dev/null >> > +++ b/lib/percpu_test_rust.rs >> > @@ -0,0 +1,120 @@ >> > +// SPDX-License-Identifier: GPL-2.0 >> > +//! A simple self test for the rust per-CPU API. >> > + >> > +use core::ffi::c_void; >> > + >> > +use kernel::{ >> > + bindings::{on_each_cpu, smp_processor_id}, >> > + define_per_cpu, >> > + percpu::{cpu_guard::*, *}, >> > + pr_info, >> > + prelude::*, >> > + unsafe_get_per_cpu, >> > +}; >> > + >> > +module! { >> > + type: PerCpuTestModule, >> > + name: "percpu_test_rust", >> > + author: "Mitchell Levy", >> > + description: "Test code to exercise the Rust Per CPU variable API", >> > + license: "GPL v2", >> > +} >> > + >> > +struct PerCpuTestModule; >> > + >> > +define_per_cpu!(PERCPU: i64 = 0); >> > +define_per_cpu!(UPERCPU: u64 = 0); >> > + >> > +impl kernel::Module for PerCpuTestModule { >> > + fn init(_module: &'static ThisModule) -> Result<Self, Error> { >> > + pr_info!("rust percpu test start\n"); >> > + >> > + let mut native: i64 = 0; >> > + // SAFETY: PERCPU is properly defined >> > + let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; >> >> I don't understand why we need unsafe here, can't we just create >> something specially in the `define_per_cpu` macro that is then confirmed >> by the `get_per_cpu!` macro and thus it can be safe? > > As is, something like > define_per_cpu!(PERCPU: i32 = 0); > > fn func() { > let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; > } > will compile, but any usage of `pcpu` will be UB. This is because > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I > know, the compiler does not do any checking of pointer casts. If you > have thoughts/ideas on how to get around this problem, I'd certainly > *like* to provide a safe API here :) I haven't taken a look at your implementation, but you do have the type declared in `define_per_cpu!`, so it's a bit of a mystery to me why you can't get that out in `unsafe_get_per_cpu!`... Maybe in a few weeks I'll be able to take a closer look. >> > + // SAFETY: We only have one PerCpu that points at PERCPU >> > + unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| { >> >> Hmm I also don't like the unsafe part here... >> >> Can't we use the same API that `thread_local!` in the standard library >> has: >> >> https://doc.rust-lang.org/std/macro.thread_local.html >> >> So in this example you would store a `Cell<i64>` instead. >> >> I'm not familiar with per CPU variables, but if you're usually storing >> `Copy` types, then this is much better wrt not having unsafe code >> everywhere. >> >> If one also often stores `!Copy` types, then we might be able to get >> away with `RefCell`, but that's a small runtime overhead -- which is >> probably bad given that per cpu variables are most likely used for >> performance reasons? In that case the user might just need to store >> `UnsafeCell` and use unsafe regardless. (or we invent something >> specifically for that case, eg tokens that are statically known to be >> unique etc) > > I'm open to including a specialization for `T: Copy` in a similar vein > to what I have here for numeric types. Off the top of my head, that > shouldn't require any user-facing `unsafe`. But yes, I believe there is > a significant amount of interest in having `!Copy` per-CPU variables. > (At least, I'm interested in having them around for experimenting with > using Rust for HV drivers.) What kinds of types would you like to store? Allocations? Just integers in bigger structs? Mutexes? > I would definitely like to avoid *requiring* the use of `RefCell` since, > as you mention, it does have a runtime overhead. Per-CPU variables can > be used for "logical" reasons rather than just as a performance > optimization, so there might be some cases where paying the runtime > overhead is ok. But that's certainly not true in all cases. That said, > perhaps there could be a safely obtainable token type that only passes a > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't > mind the runtime overhead, they can choose `T` to be a `RefCell`. > Thoughts? So I think using an API similar to `thread_local!` will allow us to have multiple other APIs that slot into that. `Cell<T>` for `T: Copy`, `RefCell<T>` for cases where you don't care about the runtime overhead, plain `T` for cases where you only need `&T`. For the case where you need `&mut T`, we could have something like a `TokenCell<T>` that gives out a token that you need to mutably borrow in order to get `&mut T`. Finally for anything else that is too restricted by this, users can also use `UnsafeCell<T>` although that requires `unsafe`. I think the advantage of this is that the common cases are all safe and very idiomatic. In the current design, you *always* have to use unsafe. > For `UnsafeCell`, if a user of the API were to have something like a > `PerCpu<UnsafeCell<T>>` that safely spits out a `&UnsafeCell<T>`, my > understanding is that mutating the underlying `T` would require the > exact same safety guarantees as what's here, except now it'd need a much > bigger unsafe block and would have to do all of its manipulations via > pointers. That seems like a pretty big ergonomics burden without a clear > (to me) benefit. It would require the same amount of unsafe & safety comments, but it wouldn't be bigger comments, since you can just as well create `&mut T` to the value. --- Cheers, Benno
On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote: [...] > >> > +impl kernel::Module for PerCpuTestModule { > >> > + fn init(_module: &'static ThisModule) -> Result<Self, Error> { > >> > + pr_info!("rust percpu test start\n"); > >> > + > >> > + let mut native: i64 = 0; > >> > + // SAFETY: PERCPU is properly defined > >> > + let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; > >> > >> I don't understand why we need unsafe here, can't we just create > >> something specially in the `define_per_cpu` macro that is then confirmed > >> by the `get_per_cpu!` macro and thus it can be safe? > > > > As is, something like > > define_per_cpu!(PERCPU: i32 = 0); > > > > fn func() { > > let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; > > } > > will compile, but any usage of `pcpu` will be UB. This is because > > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I > > know, the compiler does not do any checking of pointer casts. If you > > have thoughts/ideas on how to get around this problem, I'd certainly > > *like* to provide a safe API here :) > > I haven't taken a look at your implementation, but you do have the type > declared in `define_per_cpu!`, so it's a bit of a mystery to me why you > can't get that out in `unsafe_get_per_cpu!`... > > Maybe in a few weeks I'll be able to take a closer look. > > >> > + // SAFETY: We only have one PerCpu that points at PERCPU > >> > + unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| { > >> > >> Hmm I also don't like the unsafe part here... > >> > >> Can't we use the same API that `thread_local!` in the standard library First of all, `thread_local!` has to be implemented by some sys-specific unsafe mechanism, right? For example on unix, I think it's using pthread_key_t: https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html what we are implementing (or wrapping) is the very basic unsafe mechanism for percpu here. Surely we can explore the design for a safe API, but the unsafe mechanism is probably necessary to look into at first. > >> has: > >> > >> https://doc.rust-lang.org/std/macro.thread_local.html > >> > >> So in this example you would store a `Cell<i64>` instead. > >> > >> I'm not familiar with per CPU variables, but if you're usually storing > >> `Copy` types, then this is much better wrt not having unsafe code > >> everywhere. > >> > >> If one also often stores `!Copy` types, then we might be able to get > >> away with `RefCell`, but that's a small runtime overhead -- which is > >> probably bad given that per cpu variables are most likely used for > >> performance reasons? In that case the user might just need to store > >> `UnsafeCell` and use unsafe regardless. (or we invent something This sounds reasonable to me. > >> specifically for that case, eg tokens that are statically known to be > >> unique etc) > > > > I'm open to including a specialization for `T: Copy` in a similar vein > > to what I have here for numeric types. Off the top of my head, that > > shouldn't require any user-facing `unsafe`. But yes, I believe there is > > a significant amount of interest in having `!Copy` per-CPU variables. > > (At least, I'm interested in having them around for experimenting with > > using Rust for HV drivers.) > > What kinds of types would you like to store? Allocations? Just integers > in bigger structs? Mutexes? > In the VMBus driver, there is a percpu work_struct. > > I would definitely like to avoid *requiring* the use of `RefCell` since, > > as you mention, it does have a runtime overhead. Per-CPU variables can > > be used for "logical" reasons rather than just as a performance > > optimization, so there might be some cases where paying the runtime > > overhead is ok. But that's certainly not true in all cases. That said, > > perhaps there could be a safely obtainable token type that only passes a > > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't > > mind the runtime overhead, they can choose `T` to be a `RefCell`. > > Thoughts? > > So I think using an API similar to `thread_local!` will allow us to have > multiple other APIs that slot into that. `Cell<T>` for `T: Copy`, > `RefCell<T>` for cases where you don't care about the runtime overhead, > plain `T` for cases where you only need `&T`. For the case where you > need `&mut T`, we could have something like a `TokenCell<T>` that gives > out a token that you need to mutably borrow in order to get `&mut T`. > Finally for anything else that is too restricted by this, users can also > use `UnsafeCell<T>` although that requires `unsafe`. > > I think the advantage of this is that the common cases are all safe and > very idiomatic. In the current design, you *always* have to use unsafe. > I agree, but like I said, we need to figure out the unsafe interface that C already uses and build API upon it. I think focusing on the unsafe mechanism may be the way to start: you cannot implement something that cannot be implemented, and we don't have the magic pthread_key here ;-) Regards, Boqun > > For `UnsafeCell`, if a user of the API were to have something like a > > `PerCpu<UnsafeCell<T>>` that safely spits out a `&UnsafeCell<T>`, my > > understanding is that mutating the underlying `T` would require the > > exact same safety guarantees as what's here, except now it'd need a much > > bigger unsafe block and would have to do all of its manipulations via > > pointers. That seems like a pretty big ergonomics burden without a clear > > (to me) benefit. > > It would require the same amount of unsafe & safety comments, but it > wouldn't be bigger comments, since you can just as well create `&mut T` > to the value. > > --- > Cheers, > Benno
On Tue Jul 15, 2025 at 4:10 PM CEST, Boqun Feng wrote: > On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote: > [...] >> >> > +impl kernel::Module for PerCpuTestModule { >> >> > + fn init(_module: &'static ThisModule) -> Result<Self, Error> { >> >> > + pr_info!("rust percpu test start\n"); >> >> > + >> >> > + let mut native: i64 = 0; >> >> > + // SAFETY: PERCPU is properly defined >> >> > + let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; >> >> >> >> I don't understand why we need unsafe here, can't we just create >> >> something specially in the `define_per_cpu` macro that is then confirmed >> >> by the `get_per_cpu!` macro and thus it can be safe? >> > >> > As is, something like >> > define_per_cpu!(PERCPU: i32 = 0); >> > >> > fn func() { >> > let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; >> > } >> > will compile, but any usage of `pcpu` will be UB. This is because >> > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I >> > know, the compiler does not do any checking of pointer casts. If you >> > have thoughts/ideas on how to get around this problem, I'd certainly >> > *like* to provide a safe API here :) >> >> I haven't taken a look at your implementation, but you do have the type >> declared in `define_per_cpu!`, so it's a bit of a mystery to me why you >> can't get that out in `unsafe_get_per_cpu!`... >> >> Maybe in a few weeks I'll be able to take a closer look. >> >> >> > + // SAFETY: We only have one PerCpu that points at PERCPU >> >> > + unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| { >> >> >> >> Hmm I also don't like the unsafe part here... >> >> >> >> Can't we use the same API that `thread_local!` in the standard library > > First of all, `thread_local!` has to be implemented by some sys-specific > unsafe mechanism, right? For example on unix, I think it's using > pthread_key_t: > > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html > > what we are implementing (or wrapping) is the very basic unsafe > mechanism for percpu here. Surely we can explore the design for a safe > API, but the unsafe mechanism is probably necessary to look into at > first. But this is intended to be used by drivers, right? If so, then we should do our usual due diligence and work out a safe abstraction. Only fall back to unsafe if it isn't possible. I'm not familiar with percpu, but from the name I assumed that it's "just a variable for each cpu" so similar to `thread_local!`, but it's bound to the specific cpu instead of the thread. That in my mind should be rather easy to support in Rust at least with the thread_local-style API. You just need to ensure that no reference can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint to detect context switches. >> >> has: >> >> >> >> https://doc.rust-lang.org/std/macro.thread_local.html >> >> >> >> So in this example you would store a `Cell<i64>` instead. >> >> >> >> I'm not familiar with per CPU variables, but if you're usually storing >> >> `Copy` types, then this is much better wrt not having unsafe code >> >> everywhere. >> >> >> >> If one also often stores `!Copy` types, then we might be able to get >> >> away with `RefCell`, but that's a small runtime overhead -- which is >> >> probably bad given that per cpu variables are most likely used for >> >> performance reasons? In that case the user might just need to store >> >> `UnsafeCell` and use unsafe regardless. (or we invent something > > This sounds reasonable to me. > >> >> specifically for that case, eg tokens that are statically known to be >> >> unique etc) >> > >> > I'm open to including a specialization for `T: Copy` in a similar vein >> > to what I have here for numeric types. Off the top of my head, that >> > shouldn't require any user-facing `unsafe`. But yes, I believe there is >> > a significant amount of interest in having `!Copy` per-CPU variables. >> > (At least, I'm interested in having them around for experimenting with >> > using Rust for HV drivers.) >> >> What kinds of types would you like to store? Allocations? Just integers >> in bigger structs? Mutexes? >> > > In the VMBus driver, there is a percpu work_struct. Do you have a link? Or better yet a Rust struct description of what you think it will look like :) >> > I would definitely like to avoid *requiring* the use of `RefCell` since, >> > as you mention, it does have a runtime overhead. Per-CPU variables can >> > be used for "logical" reasons rather than just as a performance >> > optimization, so there might be some cases where paying the runtime >> > overhead is ok. But that's certainly not true in all cases. That said, >> > perhaps there could be a safely obtainable token type that only passes a >> > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't >> > mind the runtime overhead, they can choose `T` to be a `RefCell`. >> > Thoughts? >> >> So I think using an API similar to `thread_local!` will allow us to have >> multiple other APIs that slot into that. `Cell<T>` for `T: Copy`, >> `RefCell<T>` for cases where you don't care about the runtime overhead, >> plain `T` for cases where you only need `&T`. For the case where you >> need `&mut T`, we could have something like a `TokenCell<T>` that gives >> out a token that you need to mutably borrow in order to get `&mut T`. >> Finally for anything else that is too restricted by this, users can also >> use `UnsafeCell<T>` although that requires `unsafe`. >> >> I think the advantage of this is that the common cases are all safe and >> very idiomatic. In the current design, you *always* have to use unsafe. >> > > I agree, but like I said, we need to figure out the unsafe interface > that C already uses and build API upon it. I think focusing on the > unsafe mechanism may be the way to start: you cannot implement something > that cannot be implemented, and we don't have the magic pthread_key here > ;-) Sure we can do some experimentation, but I don't think we should put unsafe abstractions upstream that we intend to replace with a safe abstraction later. Otherwise people are going to depend on it and it's going to be a mess. Do the experimenting out of tree and learn there. --- Cheers, Benno
On Tue, Jul 15, 2025 at 05:55:13PM +0200, Benno Lossin wrote: > On Tue Jul 15, 2025 at 4:10 PM CEST, Boqun Feng wrote: > > On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote: > > [...] > >> >> > +impl kernel::Module for PerCpuTestModule { > >> >> > + fn init(_module: &'static ThisModule) -> Result<Self, Error> { > >> >> > + pr_info!("rust percpu test start\n"); > >> >> > + > >> >> > + let mut native: i64 = 0; > >> >> > + // SAFETY: PERCPU is properly defined > >> >> > + let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; > >> >> > >> >> I don't understand why we need unsafe here, can't we just create > >> >> something specially in the `define_per_cpu` macro that is then confirmed > >> >> by the `get_per_cpu!` macro and thus it can be safe? > >> > > >> > As is, something like > >> > define_per_cpu!(PERCPU: i32 = 0); > >> > > >> > fn func() { > >> > let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; > >> > } > >> > will compile, but any usage of `pcpu` will be UB. This is because > >> > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I > >> > know, the compiler does not do any checking of pointer casts. If you > >> > have thoughts/ideas on how to get around this problem, I'd certainly > >> > *like* to provide a safe API here :) > >> > >> I haven't taken a look at your implementation, but you do have the type > >> declared in `define_per_cpu!`, so it's a bit of a mystery to me why you > >> can't get that out in `unsafe_get_per_cpu!`... > >> > >> Maybe in a few weeks I'll be able to take a closer look. > >> > >> >> > + // SAFETY: We only have one PerCpu that points at PERCPU > >> >> > + unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| { > >> >> > >> >> Hmm I also don't like the unsafe part here... > >> >> > >> >> Can't we use the same API that `thread_local!` in the standard library > > > > First of all, `thread_local!` has to be implemented by some sys-specific > > unsafe mechanism, right? For example on unix, I think it's using > > pthread_key_t: > > > > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html > > > > what we are implementing (or wrapping) is the very basic unsafe > > mechanism for percpu here. Surely we can explore the design for a safe > > API, but the unsafe mechanism is probably necessary to look into at > > first. > > But this is intended to be used by drivers, right? If so, then we should Not necessarily only for drivers, we can also use it for implementing other safe abstraction (e.g. hazard pointers, percpu counters etc) > do our usual due diligence and work out a safe abstraction. Only fall > back to unsafe if it isn't possible. > All I'm saying is instead of figuring out a safe abstraction at first, we should probably focus on identifying how to implement it and which part is really unsafe and the safety requirement for that. > I'm not familiar with percpu, but from the name I assumed that it's > "just a variable for each cpu" so similar to `thread_local!`, but it's > bound to the specific cpu instead of the thread. > > That in my mind should be rather easy to support in Rust at least with > the thread_local-style API. You just need to ensure that no reference > can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint Not really, in kernel, we have plenty of use cases that we read the other CPU's percpu variables. For example, each CPU keeps it's own counter and we sum them other in another CPU. If we would like to model it conceptually, it's more like an array that's index by CpuId to me. > to detect context switches. > > >> >> has: > >> >> > >> >> https://doc.rust-lang.org/std/macro.thread_local.html > >> >> > >> >> So in this example you would store a `Cell<i64>` instead. > >> >> > >> >> I'm not familiar with per CPU variables, but if you're usually storing > >> >> `Copy` types, then this is much better wrt not having unsafe code > >> >> everywhere. > >> >> > >> >> If one also often stores `!Copy` types, then we might be able to get > >> >> away with `RefCell`, but that's a small runtime overhead -- which is > >> >> probably bad given that per cpu variables are most likely used for > >> >> performance reasons? In that case the user might just need to store > >> >> `UnsafeCell` and use unsafe regardless. (or we invent something > > > > This sounds reasonable to me. > > > >> >> specifically for that case, eg tokens that are statically known to be > >> >> unique etc) > >> > > >> > I'm open to including a specialization for `T: Copy` in a similar vein > >> > to what I have here for numeric types. Off the top of my head, that > >> > shouldn't require any user-facing `unsafe`. But yes, I believe there is > >> > a significant amount of interest in having `!Copy` per-CPU variables. > >> > (At least, I'm interested in having them around for experimenting with > >> > using Rust for HV drivers.) > >> > >> What kinds of types would you like to store? Allocations? Just integers > >> in bigger structs? Mutexes? > >> > > > > In the VMBus driver, there is a percpu work_struct. > > Do you have a link? Or better yet a Rust struct description of what you > think it will look like :) > Not Rust code yet, but here is the corresponding C code: https://github.com/Rust-for-Linux/linux/blob/rust-next/drivers/hv/vmbus_drv.c#L1396 But please note that we are not solely developing the abstraction for this usage, but more for generally understand how to wrap percpu functionality similar to the usage in C. > >> > I would definitely like to avoid *requiring* the use of `RefCell` since, > >> > as you mention, it does have a runtime overhead. Per-CPU variables can > >> > be used for "logical" reasons rather than just as a performance > >> > optimization, so there might be some cases where paying the runtime > >> > overhead is ok. But that's certainly not true in all cases. That said, > >> > perhaps there could be a safely obtainable token type that only passes a > >> > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't > >> > mind the runtime overhead, they can choose `T` to be a `RefCell`. > >> > Thoughts? > >> > >> So I think using an API similar to `thread_local!` will allow us to have > >> multiple other APIs that slot into that. `Cell<T>` for `T: Copy`, > >> `RefCell<T>` for cases where you don't care about the runtime overhead, > >> plain `T` for cases where you only need `&T`. For the case where you > >> need `&mut T`, we could have something like a `TokenCell<T>` that gives > >> out a token that you need to mutably borrow in order to get `&mut T`. > >> Finally for anything else that is too restricted by this, users can also > >> use `UnsafeCell<T>` although that requires `unsafe`. > >> > >> I think the advantage of this is that the common cases are all safe and > >> very idiomatic. In the current design, you *always* have to use unsafe. > >> > > > > I agree, but like I said, we need to figure out the unsafe interface > > that C already uses and build API upon it. I think focusing on the > > unsafe mechanism may be the way to start: you cannot implement something > > that cannot be implemented, and we don't have the magic pthread_key here > > ;-) > > Sure we can do some experimentation, but I don't think we should put > unsafe abstractions upstream that we intend to replace with a safe > abstraction later. Otherwise people are going to depend on it and it's I doubt we can replace the unsafe abstraction with a safe one, if users really care the performance then they would really need to use some unsafe API to build their safe abstraction. > going to be a mess. Do the experimenting out of tree and learn there. I disagree, Rust as a language its own should be able to do what C does including being able to implement the percpu functionality same as C, there is nothing wrong with a set of Rust primitives in the kernel that provides fundamental percpu functionality the other core facilities can rely on. The better part is that it will have all the safety requirement documented well. Regards, Boqun > > --- > Cheers, > Benno
On Tue Jul 15, 2025 at 6:31 PM CEST, Boqun Feng wrote: > On Tue, Jul 15, 2025 at 05:55:13PM +0200, Benno Lossin wrote: >> On Tue Jul 15, 2025 at 4:10 PM CEST, Boqun Feng wrote: >> > On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote: >> > [...] >> >> >> > +impl kernel::Module for PerCpuTestModule { >> >> >> > + fn init(_module: &'static ThisModule) -> Result<Self, Error> { >> >> >> > + pr_info!("rust percpu test start\n"); >> >> >> > + >> >> >> > + let mut native: i64 = 0; >> >> >> > + // SAFETY: PERCPU is properly defined >> >> >> > + let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; >> >> >> >> >> >> I don't understand why we need unsafe here, can't we just create >> >> >> something specially in the `define_per_cpu` macro that is then confirmed >> >> >> by the `get_per_cpu!` macro and thus it can be safe? >> >> > >> >> > As is, something like >> >> > define_per_cpu!(PERCPU: i32 = 0); >> >> > >> >> > fn func() { >> >> > let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) }; >> >> > } >> >> > will compile, but any usage of `pcpu` will be UB. This is because >> >> > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I >> >> > know, the compiler does not do any checking of pointer casts. If you >> >> > have thoughts/ideas on how to get around this problem, I'd certainly >> >> > *like* to provide a safe API here :) >> >> >> >> I haven't taken a look at your implementation, but you do have the type >> >> declared in `define_per_cpu!`, so it's a bit of a mystery to me why you >> >> can't get that out in `unsafe_get_per_cpu!`... >> >> >> >> Maybe in a few weeks I'll be able to take a closer look. >> >> >> >> >> > + // SAFETY: We only have one PerCpu that points at PERCPU >> >> >> > + unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| { >> >> >> >> >> >> Hmm I also don't like the unsafe part here... >> >> >> >> >> >> Can't we use the same API that `thread_local!` in the standard library >> > >> > First of all, `thread_local!` has to be implemented by some sys-specific >> > unsafe mechanism, right? For example on unix, I think it's using >> > pthread_key_t: >> > >> > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html >> > >> > what we are implementing (or wrapping) is the very basic unsafe >> > mechanism for percpu here. Surely we can explore the design for a safe >> > API, but the unsafe mechanism is probably necessary to look into at >> > first. >> >> But this is intended to be used by drivers, right? If so, then we should > > Not necessarily only for drivers, we can also use it for implementing > other safe abstraction (e.g. hazard pointers, percpu counters etc) That's fair, but then it should be `pub(crate)`. >> do our usual due diligence and work out a safe abstraction. Only fall >> back to unsafe if it isn't possible. >> > > All I'm saying is instead of figuring out a safe abstraction at first, > we should probably focus on identifying how to implement it and which > part is really unsafe and the safety requirement for that. Yeah. But then we should do that before merging :) >> I'm not familiar with percpu, but from the name I assumed that it's >> "just a variable for each cpu" so similar to `thread_local!`, but it's >> bound to the specific cpu instead of the thread. >> >> That in my mind should be rather easy to support in Rust at least with >> the thread_local-style API. You just need to ensure that no reference >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint > > Not really, in kernel, we have plenty of use cases that we read the > other CPU's percpu variables. For example, each CPU keeps it's own > counter and we sum them other in another CPU. But then you need some sort of synchronization? > If we would like to model it conceptually, it's more like an array > that's index by CpuId to me. Gotcha, but this model is missing the access control/synchronization. So I'm not so sure how useful it is. (I think I asked this somewhere else, but the number of CPUs doesn't change, right?) >> to detect context switches. >> >> >> >> has: >> >> >> >> >> >> https://doc.rust-lang.org/std/macro.thread_local.html >> >> >> >> >> >> So in this example you would store a `Cell<i64>` instead. >> >> >> >> >> >> I'm not familiar with per CPU variables, but if you're usually storing >> >> >> `Copy` types, then this is much better wrt not having unsafe code >> >> >> everywhere. >> >> >> >> >> >> If one also often stores `!Copy` types, then we might be able to get >> >> >> away with `RefCell`, but that's a small runtime overhead -- which is >> >> >> probably bad given that per cpu variables are most likely used for >> >> >> performance reasons? In that case the user might just need to store >> >> >> `UnsafeCell` and use unsafe regardless. (or we invent something >> > >> > This sounds reasonable to me. >> > >> >> >> specifically for that case, eg tokens that are statically known to be >> >> >> unique etc) >> >> > >> >> > I'm open to including a specialization for `T: Copy` in a similar vein >> >> > to what I have here for numeric types. Off the top of my head, that >> >> > shouldn't require any user-facing `unsafe`. But yes, I believe there is >> >> > a significant amount of interest in having `!Copy` per-CPU variables. >> >> > (At least, I'm interested in having them around for experimenting with >> >> > using Rust for HV drivers.) >> >> >> >> What kinds of types would you like to store? Allocations? Just integers >> >> in bigger structs? Mutexes? >> >> >> > >> > In the VMBus driver, there is a percpu work_struct. >> >> Do you have a link? Or better yet a Rust struct description of what you >> think it will look like :) >> > > Not Rust code yet, but here is the corresponding C code: > > https://github.com/Rust-for-Linux/linux/blob/rust-next/drivers/hv/vmbus_drv.c#L1396 Thanks! > But please note that we are not solely developing the abstraction for > this usage, but more for generally understand how to wrap percpu > functionality similar to the usage in C. Well, I have to start somewhere for looking at the use-cases :) If you have more, just let me see. (probably won't have enough time to look at them now, but maybe in a couple weeks) >> >> > I would definitely like to avoid *requiring* the use of `RefCell` since, >> >> > as you mention, it does have a runtime overhead. Per-CPU variables can >> >> > be used for "logical" reasons rather than just as a performance >> >> > optimization, so there might be some cases where paying the runtime >> >> > overhead is ok. But that's certainly not true in all cases. That said, >> >> > perhaps there could be a safely obtainable token type that only passes a >> >> > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't >> >> > mind the runtime overhead, they can choose `T` to be a `RefCell`. >> >> > Thoughts? >> >> >> >> So I think using an API similar to `thread_local!` will allow us to have >> >> multiple other APIs that slot into that. `Cell<T>` for `T: Copy`, >> >> `RefCell<T>` for cases where you don't care about the runtime overhead, >> >> plain `T` for cases where you only need `&T`. For the case where you >> >> need `&mut T`, we could have something like a `TokenCell<T>` that gives >> >> out a token that you need to mutably borrow in order to get `&mut T`. >> >> Finally for anything else that is too restricted by this, users can also >> >> use `UnsafeCell<T>` although that requires `unsafe`. >> >> >> >> I think the advantage of this is that the common cases are all safe and >> >> very idiomatic. In the current design, you *always* have to use unsafe. >> >> >> > >> > I agree, but like I said, we need to figure out the unsafe interface >> > that C already uses and build API upon it. I think focusing on the >> > unsafe mechanism may be the way to start: you cannot implement something >> > that cannot be implemented, and we don't have the magic pthread_key here >> > ;-) >> >> Sure we can do some experimentation, but I don't think we should put >> unsafe abstractions upstream that we intend to replace with a safe >> abstraction later. Otherwise people are going to depend on it and it's > > I doubt we can replace the unsafe abstraction with a safe one, if users > really care the performance then they would really need to use some > unsafe API to build their safe abstraction. That sounds pretty pessimistic, why do you think that? >> going to be a mess. Do the experimenting out of tree and learn there. > > I disagree, Rust as a language its own should be able to do what C does > including being able to implement the percpu functionality same as C, > there is nothing wrong with a set of Rust primitives in the kernel that > provides fundamental percpu functionality the other core facilities can > rely on. The better part is that it will have all the safety requirement > documented well. Sure, but we haven't even tried to make it safe, so I don't think we should add them now in this state. --- Cheers, Benno
On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote: [...] > >> > >> Sure we can do some experimentation, but I don't think we should put > >> unsafe abstractions upstream that we intend to replace with a safe > >> abstraction later. Otherwise people are going to depend on it and it's > > > > I doubt we can replace the unsafe abstraction with a safe one, if users > > really care the performance then they would really need to use some > > unsafe API to build their safe abstraction. > > That sounds pretty pessimistic, why do you think that? > I could ask you the similar, you barely know the implementation and usage the percpu, why do you think it's possible? ;-) Regards, Boqun
On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote: [...] > >> > > >> > First of all, `thread_local!` has to be implemented by some sys-specific > >> > unsafe mechanism, right? For example on unix, I think it's using > >> > pthread_key_t: > >> > > >> > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html > >> > > >> > what we are implementing (or wrapping) is the very basic unsafe > >> > mechanism for percpu here. Surely we can explore the design for a safe > >> > API, but the unsafe mechanism is probably necessary to look into at > >> > first. > >> > >> But this is intended to be used by drivers, right? If so, then we should > > > > Not necessarily only for drivers, we can also use it for implementing > > other safe abstraction (e.g. hazard pointers, percpu counters etc) > > That's fair, but then it should be `pub(crate)`. > Fine by me, but please see below. > >> do our usual due diligence and work out a safe abstraction. Only fall > >> back to unsafe if it isn't possible. > >> > > > > All I'm saying is instead of figuring out a safe abstraction at first, > > we should probably focus on identifying how to implement it and which > > part is really unsafe and the safety requirement for that. > > Yeah. But then we should do that before merging :) > Well, who's talknig about merging? ;-) I thought we just began reviewing here ;-) > >> I'm not familiar with percpu, but from the name I assumed that it's > >> "just a variable for each cpu" so similar to `thread_local!`, but it's > >> bound to the specific cpu instead of the thread. > >> > >> That in my mind should be rather easy to support in Rust at least with > >> the thread_local-style API. You just need to ensure that no reference > >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint > > > > Not really, in kernel, we have plenty of use cases that we read the > > other CPU's percpu variables. For example, each CPU keeps it's own > > counter and we sum them other in another CPU. > > But then you need some sort of synchronization? > Right, but the synchronization can exist either in the percpu operations themselves or outside the percpu operations. Some cases, the data types are small enough to fit in atomic data types, and operations are just load/store/cmpxchg etc, then operations on the current cpu and remote read will be naturally synchronized. Sometimes extra synchronization is needed. Keyword find all these cases are `per_cpu_ptr()`: https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr > > If we would like to model it conceptually, it's more like an array > > that's index by CpuId to me. > > Gotcha, but this model is missing the access control/synchronization. So > I'm not so sure how useful it is. > > (I think I asked this somewhere else, but the number of CPUs doesn't > change, right?) > In terms of percpu variable, yes. A percpu variable is even available for an offline CPU. > >> to detect context switches. > >> > >> >> >> has: > >> >> >> > >> >> >> https://doc.rust-lang.org/std/macro.thread_local.html > >> >> >> > >> >> >> So in this example you would store a `Cell<i64>` instead. > >> >> >> > >> >> >> I'm not familiar with per CPU variables, but if you're usually storing > >> >> >> `Copy` types, then this is much better wrt not having unsafe code > >> >> >> everywhere. > >> >> >> > >> >> >> If one also often stores `!Copy` types, then we might be able to get > >> >> >> away with `RefCell`, but that's a small runtime overhead -- which is > >> >> >> probably bad given that per cpu variables are most likely used for > >> >> >> performance reasons? In that case the user might just need to store > >> >> >> `UnsafeCell` and use unsafe regardless. (or we invent something > >> > > >> > This sounds reasonable to me. > >> > > >> >> >> specifically for that case, eg tokens that are statically known to be > >> >> >> unique etc) > >> >> > > >> >> > I'm open to including a specialization for `T: Copy` in a similar vein > >> >> > to what I have here for numeric types. Off the top of my head, that > >> >> > shouldn't require any user-facing `unsafe`. But yes, I believe there is > >> >> > a significant amount of interest in having `!Copy` per-CPU variables. > >> >> > (At least, I'm interested in having them around for experimenting with > >> >> > using Rust for HV drivers.) > >> >> > >> >> What kinds of types would you like to store? Allocations? Just integers > >> >> in bigger structs? Mutexes? > >> >> > >> > > >> > In the VMBus driver, there is a percpu work_struct. > >> > >> Do you have a link? Or better yet a Rust struct description of what you > >> think it will look like :) > >> > > > > Not Rust code yet, but here is the corresponding C code: > > > > https://github.com/Rust-for-Linux/linux/blob/rust-next/drivers/hv/vmbus_drv.c#L1396 > > Thanks! > > > But please note that we are not solely developing the abstraction for > > this usage, but more for generally understand how to wrap percpu > > functionality similar to the usage in C. > > Well, I have to start somewhere for looking at the use-cases :) > > If you have more, just let me see. (probably won't have enough time to > look at them now, but maybe in a couple weeks) > If you have time, feel free to take a look at hazard pointers; https://lore.kernel.org/lkml/20240917143402.930114-1-boqun.feng@gmail.com/ https://lore.kernel.org/lkml/20250625031101.12555-1-boqun.feng@gmail.com/ You can also take a look at existing usage of percpu, e.g. SRCU uses to track how many readers are active: https://elixir.bootlin.com/linux/v6.15.6/source/kernel/rcu/srcutree.c#L577 [...] Regards, Boqun
On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote: > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote: > [...] >> >> > >> >> > First of all, `thread_local!` has to be implemented by some sys-specific >> >> > unsafe mechanism, right? For example on unix, I think it's using >> >> > pthread_key_t: >> >> > >> >> > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html >> >> > >> >> > what we are implementing (or wrapping) is the very basic unsafe >> >> > mechanism for percpu here. Surely we can explore the design for a safe >> >> > API, but the unsafe mechanism is probably necessary to look into at >> >> > first. >> >> >> >> But this is intended to be used by drivers, right? If so, then we should >> > >> > Not necessarily only for drivers, we can also use it for implementing >> > other safe abstraction (e.g. hazard pointers, percpu counters etc) >> >> That's fair, but then it should be `pub(crate)`. >> > > Fine by me, but please see below. > >> >> do our usual due diligence and work out a safe abstraction. Only fall >> >> back to unsafe if it isn't possible. >> >> >> > >> > All I'm saying is instead of figuring out a safe abstraction at first, >> > we should probably focus on identifying how to implement it and which >> > part is really unsafe and the safety requirement for that. >> >> Yeah. But then we should do that before merging :) >> > > Well, who's talknig about merging? ;-) I thought we just began reviewing > here ;-) I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as "I want to talk about merging this". It might be that I haven't seen the RFC patch series, because I often mute those. >> >> I'm not familiar with percpu, but from the name I assumed that it's >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's >> >> bound to the specific cpu instead of the thread. >> >> >> >> That in my mind should be rather easy to support in Rust at least with >> >> the thread_local-style API. You just need to ensure that no reference >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint >> > >> > Not really, in kernel, we have plenty of use cases that we read the >> > other CPU's percpu variables. For example, each CPU keeps it's own >> > counter and we sum them other in another CPU. >> >> But then you need some sort of synchronization? >> > > Right, but the synchronization can exist either in the percpu operations > themselves or outside the percpu operations. Some cases, the data types > are small enough to fit in atomic data types, and operations are just > load/store/cmpxchg etc, then operations on the current cpu and remote > read will be naturally synchronized. Sometimes extra synchronization is > needed. Sure, so we probably want direct atomics support. What about "extra synchronization"? Is that using locks or RCU or what else? > Keyword find all these cases are `per_cpu_ptr()`: > > https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr Could you explain to me how to find them? I can either click on one of the files with horrible C preprocessor macros or the auto-completion in the search bar. But that one only shows 3 suggestions `_hyp_sym`, `_nvhe_sym` and `_to_phys` which doesn't really mean much to me. --- Cheers, Benno
On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote: > On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote: > > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote: > > [...] > >> >> > > >> >> > First of all, `thread_local!` has to be implemented by some sys-specific > >> >> > unsafe mechanism, right? For example on unix, I think it's using > >> >> > pthread_key_t: > >> >> > > >> >> > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html > >> >> > > >> >> > what we are implementing (or wrapping) is the very basic unsafe > >> >> > mechanism for percpu here. Surely we can explore the design for a safe > >> >> > API, but the unsafe mechanism is probably necessary to look into at > >> >> > first. > >> >> > >> >> But this is intended to be used by drivers, right? If so, then we should > >> > > >> > Not necessarily only for drivers, we can also use it for implementing > >> > other safe abstraction (e.g. hazard pointers, percpu counters etc) > >> > >> That's fair, but then it should be `pub(crate)`. > >> > > > > Fine by me, but please see below. > > > >> >> do our usual due diligence and work out a safe abstraction. Only fall > >> >> back to unsafe if it isn't possible. > >> >> > >> > > >> > All I'm saying is instead of figuring out a safe abstraction at first, > >> > we should probably focus on identifying how to implement it and which > >> > part is really unsafe and the safety requirement for that. > >> > >> Yeah. But then we should do that before merging :) > >> > > > > Well, who's talknig about merging? ;-) I thought we just began reviewing > > here ;-) > > I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as But it doesn't mean "merge as it is", right? I don't think either I or Mitchell implied that, I'm surprised that you had to mention that, also based on "I often mute those" below, making it "[PATCH]" seems to be a practical way to get more attention if one wants to get some reviews. > "I want to talk about merging this". It might be that I haven't seen the > RFC patch series, because I often mute those. > Well, then you cannot blame people to move from "RFC PATCH" to "PATCH" stage for more reviews, right? And you cannot make rules about what the difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-) > >> >> I'm not familiar with percpu, but from the name I assumed that it's > >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's > >> >> bound to the specific cpu instead of the thread. > >> >> > >> >> That in my mind should be rather easy to support in Rust at least with > >> >> the thread_local-style API. You just need to ensure that no reference > >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint > >> > > >> > Not really, in kernel, we have plenty of use cases that we read the > >> > other CPU's percpu variables. For example, each CPU keeps it's own > >> > counter and we sum them other in another CPU. > >> > >> But then you need some sort of synchronization? > >> > > > > Right, but the synchronization can exist either in the percpu operations > > themselves or outside the percpu operations. Some cases, the data types > > are small enough to fit in atomic data types, and operations are just > > load/store/cmpxchg etc, then operations on the current cpu and remote > > read will be naturally synchronized. Sometimes extra synchronization is > > needed. > > Sure, so we probably want direct atomics support. What about "extra > synchronization"? Is that using locks or RCU or what else? > It's up to the users obviously, It could be some sort of locking or RCU, it's case by case. > > Keyword find all these cases are `per_cpu_ptr()`: > > > > https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr > > Could you explain to me how to find them? I can either click on one of > the files with horrible C preprocessor macros or the auto-completion in > the search bar. But that one only shows 3 suggestions `_hyp_sym`, > `_nvhe_sym` and `_to_phys` which doesn't really mean much to me. > You need to find the usage of `per_cpu_ptr()`, which is a function that gives you a pointer to a percpu variable on the other CPU, and then that's usually the case where a "remote" read of percpu variable happens. Regards, Boqun > --- > Cheers, > Benno
On Wed Jul 16, 2025 at 5:33 PM CEST, Boqun Feng wrote: > On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote: >> On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote: >> > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote: >> > [...] >> >> >> > >> >> >> > First of all, `thread_local!` has to be implemented by some sys-specific >> >> >> > unsafe mechanism, right? For example on unix, I think it's using >> >> >> > pthread_key_t: >> >> >> > >> >> >> > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html >> >> >> > >> >> >> > what we are implementing (or wrapping) is the very basic unsafe >> >> >> > mechanism for percpu here. Surely we can explore the design for a safe >> >> >> > API, but the unsafe mechanism is probably necessary to look into at >> >> >> > first. >> >> >> >> >> >> But this is intended to be used by drivers, right? If so, then we should >> >> > >> >> > Not necessarily only for drivers, we can also use it for implementing >> >> > other safe abstraction (e.g. hazard pointers, percpu counters etc) >> >> >> >> That's fair, but then it should be `pub(crate)`. >> >> >> > >> > Fine by me, but please see below. >> > >> >> >> do our usual due diligence and work out a safe abstraction. Only fall >> >> >> back to unsafe if it isn't possible. >> >> >> >> >> > >> >> > All I'm saying is instead of figuring out a safe abstraction at first, >> >> > we should probably focus on identifying how to implement it and which >> >> > part is really unsafe and the safety requirement for that. >> >> >> >> Yeah. But then we should do that before merging :) >> >> >> > >> > Well, who's talknig about merging? ;-) I thought we just began reviewing >> > here ;-) >> >> I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as > > But it doesn't mean "merge as it is", right? I don't think either I or > Mitchell implied that, I'm surprised that you had to mention that, Yeah that is true, but it at least shows the intention :) > also based on "I often mute those" below, making it "[PATCH]" seems to > be a practical way to get more attention if one wants to get some > reviews. That is true, I do usually read the titles of RFC patches though and sometimes take a look eg your atomics series. >> "I want to talk about merging this". It might be that I haven't seen the >> RFC patch series, because I often mute those. >> > > Well, then you cannot blame people to move from "RFC PATCH" to "PATCH" > stage for more reviews, right? And you cannot make rules about what the > difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-) I'm not trying to blame anyone. I saw a lot of unsafe in the example and thought "we can do better" and since I haven't heard any sufficient arguments showing that it's impossible to improve, we should do some design work. >> >> >> I'm not familiar with percpu, but from the name I assumed that it's >> >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's >> >> >> bound to the specific cpu instead of the thread. >> >> >> >> >> >> That in my mind should be rather easy to support in Rust at least with >> >> >> the thread_local-style API. You just need to ensure that no reference >> >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint >> >> > >> >> > Not really, in kernel, we have plenty of use cases that we read the >> >> > other CPU's percpu variables. For example, each CPU keeps it's own >> >> > counter and we sum them other in another CPU. >> >> >> >> But then you need some sort of synchronization? >> >> >> > >> > Right, but the synchronization can exist either in the percpu operations >> > themselves or outside the percpu operations. Some cases, the data types >> > are small enough to fit in atomic data types, and operations are just >> > load/store/cmpxchg etc, then operations on the current cpu and remote >> > read will be naturally synchronized. Sometimes extra synchronization is >> > needed. >> >> Sure, so we probably want direct atomics support. What about "extra >> synchronization"? Is that using locks or RCU or what else? >> > > It's up to the users obviously, It could be some sort of locking or RCU, > it's case by case. Makes sense, what do you need in the VMS driver? >> > Keyword find all these cases are `per_cpu_ptr()`: >> > >> > https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr >> >> Could you explain to me how to find them? I can either click on one of >> the files with horrible C preprocessor macros or the auto-completion in >> the search bar. But that one only shows 3 suggestions `_hyp_sym`, >> `_nvhe_sym` and `_to_phys` which doesn't really mean much to me. >> > > You need to find the usage of `per_cpu_ptr()`, which is a function that > gives you a pointer to a percpu variable on the other CPU, and then > that's usually the case where a "remote" read of percpu variable > happens. Ahh gotcha, I thought you pointed me to some definitions of operations on percpu pointers. --- Cheers, Benno
On Wed, Jul 16, 2025 at 07:21:32PM +0200, Benno Lossin wrote: > On Wed Jul 16, 2025 at 5:33 PM CEST, Boqun Feng wrote: > > On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote: > >> On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote: > >> > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote: > >> > [...] > >> >> >> > > >> >> >> > First of all, `thread_local!` has to be implemented by some sys-specific > >> >> >> > unsafe mechanism, right? For example on unix, I think it's using > >> >> >> > pthread_key_t: > >> >> >> > > >> >> >> > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html > >> >> >> > > >> >> >> > what we are implementing (or wrapping) is the very basic unsafe > >> >> >> > mechanism for percpu here. Surely we can explore the design for a safe > >> >> >> > API, but the unsafe mechanism is probably necessary to look into at > >> >> >> > first. > >> >> >> > >> >> >> But this is intended to be used by drivers, right? If so, then we should > >> >> > > >> >> > Not necessarily only for drivers, we can also use it for implementing > >> >> > other safe abstraction (e.g. hazard pointers, percpu counters etc) > >> >> > >> >> That's fair, but then it should be `pub(crate)`. > >> >> > >> > > >> > Fine by me, but please see below. > >> > > >> >> >> do our usual due diligence and work out a safe abstraction. Only fall > >> >> >> back to unsafe if it isn't possible. > >> >> >> > >> >> > > >> >> > All I'm saying is instead of figuring out a safe abstraction at first, > >> >> > we should probably focus on identifying how to implement it and which > >> >> > part is really unsafe and the safety requirement for that. > >> >> > >> >> Yeah. But then we should do that before merging :) > >> >> > >> > > >> > Well, who's talknig about merging? ;-) I thought we just began reviewing > >> > here ;-) > >> > >> I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as > > > > But it doesn't mean "merge as it is", right? I don't think either I or > > Mitchell implied that, I'm surprised that you had to mention that, > > Yeah that is true, but it at least shows the intention :) > > > also based on "I often mute those" below, making it "[PATCH]" seems to > > be a practical way to get more attention if one wants to get some > > reviews. > > That is true, I do usually read the titles of RFC patches though and > sometimes take a look eg your atomics series. > > >> "I want to talk about merging this". It might be that I haven't seen the > >> RFC patch series, because I often mute those. > >> > > > > Well, then you cannot blame people to move from "RFC PATCH" to "PATCH" > > stage for more reviews, right? And you cannot make rules about what the > > difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-) > > I'm not trying to blame anyone. I saw a lot of unsafe in the example and > thought "we can do better" and since I haven't heard any sufficient > arguments showing that it's impossible to improve, we should do some > design work. > I agree with you, and I like what you're proposing, but I think design work can be done at "PATCH" stage, right? And sometimes, it's also OK to do some design work even at some version like "v12" ;-) Also I want to see more forward-progress actions about the design work improvement. For example, we can examine every case that makes unsafe_get_per_cpu!() unsafe, and see if we can improve that by typing or something else. We always can "do better", but the important part is how to get there ;-) > >> >> >> I'm not familiar with percpu, but from the name I assumed that it's > >> >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's > >> >> >> bound to the specific cpu instead of the thread. > >> >> >> > >> >> >> That in my mind should be rather easy to support in Rust at least with > >> >> >> the thread_local-style API. You just need to ensure that no reference > >> >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint > >> >> > > >> >> > Not really, in kernel, we have plenty of use cases that we read the > >> >> > other CPU's percpu variables. For example, each CPU keeps it's own > >> >> > counter and we sum them other in another CPU. > >> >> > >> >> But then you need some sort of synchronization? > >> >> > >> > > >> > Right, but the synchronization can exist either in the percpu operations > >> > themselves or outside the percpu operations. Some cases, the data types > >> > are small enough to fit in atomic data types, and operations are just > >> > load/store/cmpxchg etc, then operations on the current cpu and remote > >> > read will be naturally synchronized. Sometimes extra synchronization is > >> > needed. > >> > >> Sure, so we probably want direct atomics support. What about "extra > >> synchronization"? Is that using locks or RCU or what else? > >> > > > > It's up to the users obviously, It could be some sort of locking or RCU, > > it's case by case. > > Makes sense, what do you need in the VMS driver? > In VMBus driver, it's actually isolate, i.e. each CPU only access it's own work_struct, so synchronization between CPUs is not needed. Regards, Boqun > >> > Keyword find all these cases are `per_cpu_ptr()`: > >> > > >> > https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr > >> > >> Could you explain to me how to find them? I can either click on one of > >> the files with horrible C preprocessor macros or the auto-completion in > >> the search bar. But that one only shows 3 suggestions `_hyp_sym`, > >> `_nvhe_sym` and `_to_phys` which doesn't really mean much to me. > >> > > > > You need to find the usage of `per_cpu_ptr()`, which is a function that > > gives you a pointer to a percpu variable on the other CPU, and then > > that's usually the case where a "remote" read of percpu variable > > happens. > > Ahh gotcha, I thought you pointed me to some definitions of operations > on percpu pointers. > > --- > Cheers, > Benno
On Wed Jul 16, 2025 at 7:52 PM CEST, Boqun Feng wrote: > On Wed, Jul 16, 2025 at 07:21:32PM +0200, Benno Lossin wrote: >> On Wed Jul 16, 2025 at 5:33 PM CEST, Boqun Feng wrote: >> > On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote: >> >> On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote: >> >> > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote: >> >> > [...] >> >> >> >> > >> >> >> >> > First of all, `thread_local!` has to be implemented by some sys-specific >> >> >> >> > unsafe mechanism, right? For example on unix, I think it's using >> >> >> >> > pthread_key_t: >> >> >> >> > >> >> >> >> > https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html >> >> >> >> > >> >> >> >> > what we are implementing (or wrapping) is the very basic unsafe >> >> >> >> > mechanism for percpu here. Surely we can explore the design for a safe >> >> >> >> > API, but the unsafe mechanism is probably necessary to look into at >> >> >> >> > first. >> >> >> >> >> >> >> >> But this is intended to be used by drivers, right? If so, then we should >> >> >> > >> >> >> > Not necessarily only for drivers, we can also use it for implementing >> >> >> > other safe abstraction (e.g. hazard pointers, percpu counters etc) >> >> >> >> >> >> That's fair, but then it should be `pub(crate)`. >> >> >> >> >> > >> >> > Fine by me, but please see below. >> >> > >> >> >> >> do our usual due diligence and work out a safe abstraction. Only fall >> >> >> >> back to unsafe if it isn't possible. >> >> >> >> >> >> >> > >> >> >> > All I'm saying is instead of figuring out a safe abstraction at first, >> >> >> > we should probably focus on identifying how to implement it and which >> >> >> > part is really unsafe and the safety requirement for that. >> >> >> >> >> >> Yeah. But then we should do that before merging :) >> >> >> >> >> > >> >> > Well, who's talknig about merging? ;-) I thought we just began reviewing >> >> > here ;-) >> >> >> >> I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as >> > >> > But it doesn't mean "merge as it is", right? I don't think either I or >> > Mitchell implied that, I'm surprised that you had to mention that, >> >> Yeah that is true, but it at least shows the intention :) >> >> > also based on "I often mute those" below, making it "[PATCH]" seems to >> > be a practical way to get more attention if one wants to get some >> > reviews. >> >> That is true, I do usually read the titles of RFC patches though and >> sometimes take a look eg your atomics series. >> >> >> "I want to talk about merging this". It might be that I haven't seen the >> >> RFC patch series, because I often mute those. >> >> >> > >> > Well, then you cannot blame people to move from "RFC PATCH" to "PATCH" >> > stage for more reviews, right? And you cannot make rules about what the >> > difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-) >> >> I'm not trying to blame anyone. I saw a lot of unsafe in the example and >> thought "we can do better" and since I haven't heard any sufficient >> arguments showing that it's impossible to improve, we should do some >> design work. >> > > I agree with you, and I like what you're proposing, but I think design > work can be done at "PATCH" stage, right? And sometimes, it's also OK to > do some design work even at some version like "v12" ;-) Yeah of course. The thing is just that nobody asked why there was unsafe and thus I got the impression that people thought this would be a good abstraction for percpu. (don't take from this that it's bad :) > Also I want to see more forward-progress actions about the design work > improvement. For example, we can examine every case that makes > unsafe_get_per_cpu!() unsafe, and see if we can improve that by typing > or something else. We always can "do better", but the important part is > how to get there ;-) Yeah that would be a starting point :) >> >> >> >> I'm not familiar with percpu, but from the name I assumed that it's >> >> >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's >> >> >> >> bound to the specific cpu instead of the thread. >> >> >> >> >> >> >> >> That in my mind should be rather easy to support in Rust at least with >> >> >> >> the thread_local-style API. You just need to ensure that no reference >> >> >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint >> >> >> > >> >> >> > Not really, in kernel, we have plenty of use cases that we read the >> >> >> > other CPU's percpu variables. For example, each CPU keeps it's own >> >> >> > counter and we sum them other in another CPU. >> >> >> >> >> >> But then you need some sort of synchronization? >> >> >> >> >> > >> >> > Right, but the synchronization can exist either in the percpu operations >> >> > themselves or outside the percpu operations. Some cases, the data types >> >> > are small enough to fit in atomic data types, and operations are just >> >> > load/store/cmpxchg etc, then operations on the current cpu and remote >> >> > read will be naturally synchronized. Sometimes extra synchronization is >> >> > needed. >> >> >> >> Sure, so we probably want direct atomics support. What about "extra >> >> synchronization"? Is that using locks or RCU or what else? >> >> >> > >> > It's up to the users obviously, It could be some sort of locking or RCU, >> > it's case by case. >> >> Makes sense, what do you need in the VMS driver? >> > > In VMBus driver, it's actually isolate, i.e. each CPU only access it's > own work_struct, so synchronization between CPUs is not needed. I see, so we could either just start out with no sync support or -- which I would prefer -- get a list of the most common use-cases and implement those too (or at least design the first part compatibly with further extensions). --- Cheers, Benno
© 2016 - 2025 Red Hat, Inc.