[v2] rust: Add Per-CPU Variable API

[PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Mitchell Levy 2 months, 3 weeks ago

Add a short exercise for Rust's per-CPU variable API, modelled after
lib/percpu_test.c

Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
---
 lib/Kconfig.debug       |   9 ++++
 lib/Makefile            |   1 +
 lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++
 rust/helpers/percpu.c   |  11 +++++
 4 files changed, 141 insertions(+)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ebe33181b6e6..959ce156c601 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2418,6 +2418,15 @@ config PERCPU_TEST
 
 	  If unsure, say N.
 
+config PERCPU_TEST_RUST
+	tristate "Rust per cpu operations test"
+	depends on m && DEBUG_KERNEL && RUST
+	help
+	  Enable this option to build a test module which validates Rust per-cpu
+	  operations.
+
+	  If unsure, say N.
+
 config ATOMIC64_SELFTEST
 	tristate "Perform an atomic64_t self-test"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index c38582f187dd..ab19106cc22c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -288,6 +288,7 @@ obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o
 obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
 
 obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
+obj-$(CONFIG_PERCPU_TEST_RUST) += percpu_test_rust.o
 
 obj-$(CONFIG_ASN1) += asn1_decoder.o
 obj-$(CONFIG_ASN1_ENCODER) += asn1_encoder.o
diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
new file mode 100644
index 000000000000..a9652e6ece08
--- /dev/null
+++ b/lib/percpu_test_rust.rs
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+//! A simple self test for the rust per-CPU API.
+
+use core::ffi::c_void;
+
+use kernel::{
+    bindings::{on_each_cpu, smp_processor_id},
+    define_per_cpu,
+    percpu::{cpu_guard::*, *},
+    pr_info,
+    prelude::*,
+    unsafe_get_per_cpu,
+};
+
+module! {
+    type: PerCpuTestModule,
+    name: "percpu_test_rust",
+    author: "Mitchell Levy",
+    description: "Test code to exercise the Rust Per CPU variable API",
+    license: "GPL v2",
+}
+
+struct PerCpuTestModule;
+
+define_per_cpu!(PERCPU: i64 = 0);
+define_per_cpu!(UPERCPU: u64 = 0);
+
+impl kernel::Module for PerCpuTestModule {
+    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
+        pr_info!("rust percpu test start\n");
+
+        let mut native: i64 = 0;
+        // SAFETY: PERCPU is properly defined
+        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
+        // SAFETY: We only have one PerCpu that points at PERCPU
+        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
+            pr_info!("The contents of pcpu are {}\n", val);
+
+            native += -1;
+            *val += -1;
+            pr_info!("Native: {}, *pcpu: {}\n", native, val);
+            assert!(native == *val && native == -1);
+
+            native += 1;
+            *val += 1;
+            pr_info!("Native: {}, *pcpu: {}\n", native, val);
+            assert!(native == *val && native == 0);
+        });
+
+        let mut unative: u64 = 0;
+        // SAFETY: UPERCPU is properly defined
+        let mut upcpu: StaticPerCpu<u64> = unsafe { unsafe_get_per_cpu!(UPERCPU) };
+
+        // SAFETY: We only have one PerCpu pointing at UPERCPU
+        unsafe { upcpu.get(CpuGuard::new()) }.with(|val: &mut u64| {
+            unative += 1;
+            *val += 1;
+            pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+            assert!(unative == *val && unative == 1);
+
+            unative = unative.wrapping_add((-1i64) as u64);
+            *val = val.wrapping_add((-1i64) as u64);
+            pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+            assert!(unative == *val && unative == 0);
+
+            unative = unative.wrapping_add((-1i64) as u64);
+            *val = val.wrapping_add((-1i64) as u64);
+            pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+            assert!(unative == *val && unative == (-1i64) as u64);
+
+            unative = 0;
+            *val = 0;
+
+            unative = unative.wrapping_sub(1);
+            *val = val.wrapping_sub(1);
+            pr_info!("Unative: {}, *upcpu: {}\n", unative, val);
+            assert!(unative == *val && unative == (-1i64) as u64);
+            assert!(unative == *val && unative == u64::MAX);
+        });
+
+        pr_info!("rust static percpu test done\n");
+
+        pr_info!("rust dynamic percpu test start\n");
+        let mut test: DynamicPerCpu<u64> = DynamicPerCpu::new(GFP_KERNEL).unwrap();
+
+        // SAFETY: No prerequisites for on_each_cpu.
+        unsafe {
+            on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 0);
+            on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 0);
+            on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 0);
+            on_each_cpu(Some(inc_percpu), (&raw mut test) as *mut c_void, 1);
+            on_each_cpu(Some(check_percpu), (&raw mut test) as *mut c_void, 1);
+        }
+
+        pr_info!("rust dynamic percpu test done\n");
+
+        // Return Err to unload the module
+        Result::Err(EINVAL)
+    }
+}
+
+extern "C" fn inc_percpu(info: *mut c_void) {
+    // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
+    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
+    // SAFETY: smp_processor_id has no preconditions
+    pr_info!("Incrementing on {}\n", unsafe { smp_processor_id() });
+
+    // SAFETY: We don't have multiple clones of pcpu in scope
+    unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut u64| *val += 1);
+}
+
+extern "C" fn check_percpu(info: *mut c_void) {
+    // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
+    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
+    // SAFETY: smp_processor_id has no preconditions
+    pr_info!("Asserting on {}\n", unsafe { smp_processor_id() });
+
+    // SAFETY: We don't have multiple clones of pcpu in scope
+    unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut u64| assert!(*val == 4));
+}
diff --git a/rust/helpers/percpu.c b/rust/helpers/percpu.c
index a091389f730f..0e9b2fed3ebd 100644
--- a/rust/helpers/percpu.c
+++ b/rust/helpers/percpu.c
@@ -1,9 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/percpu.h>
+#include <linux/smp.h>
 
 void __percpu *rust_helper_alloc_percpu(size_t sz, size_t align)
 {
 	return __alloc_percpu(sz, align);
 }
 
+void rust_helper_on_each_cpu(smp_call_func_t func, void *info, int wait)
+{
+	on_each_cpu(func, info, wait);
+}
+
+int rust_helper_smp_processor_id(void)
+{
+	return smp_processor_id();
+}
+

-- 
2.34.1

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Benno Lossin 2 months, 3 weeks ago

On Sat Jul 12, 2025 at 11:31 PM CEST, Mitchell Levy wrote:
> Add a short exercise for Rust's per-CPU variable API, modelled after
> lib/percpu_test.c
>
> Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
> ---
>  lib/Kconfig.debug       |   9 ++++
>  lib/Makefile            |   1 +
>  lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++

I don't know if this is the correct place, the code looks much more like
a sample, so why not place it there instead?

>  rust/helpers/percpu.c   |  11 +++++
>  4 files changed, 141 insertions(+)
> diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
> new file mode 100644
> index 000000000000..a9652e6ece08
> --- /dev/null
> +++ b/lib/percpu_test_rust.rs
> @@ -0,0 +1,120 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//! A simple self test for the rust per-CPU API.
> +
> +use core::ffi::c_void;
> +
> +use kernel::{
> +    bindings::{on_each_cpu, smp_processor_id},
> +    define_per_cpu,
> +    percpu::{cpu_guard::*, *},
> +    pr_info,
> +    prelude::*,
> +    unsafe_get_per_cpu,
> +};
> +
> +module! {
> +    type: PerCpuTestModule,
> +    name: "percpu_test_rust",
> +    author: "Mitchell Levy",
> +    description: "Test code to exercise the Rust Per CPU variable API",
> +    license: "GPL v2",
> +}
> +
> +struct PerCpuTestModule;
> +
> +define_per_cpu!(PERCPU: i64 = 0);
> +define_per_cpu!(UPERCPU: u64 = 0);
> +
> +impl kernel::Module for PerCpuTestModule {
> +    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
> +        pr_info!("rust percpu test start\n");
> +
> +        let mut native: i64 = 0;
> +        // SAFETY: PERCPU is properly defined
> +        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };

I don't understand why we need unsafe here, can't we just create
something specially in the `define_per_cpu` macro that is then confirmed
by the `get_per_cpu!` macro and thus it can be safe?

> +        // SAFETY: We only have one PerCpu that points at PERCPU
> +        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {

Hmm I also don't like the unsafe part here...

Can't we use the same API that `thread_local!` in the standard library
has:

    https://doc.rust-lang.org/std/macro.thread_local.html

So in this example you would store a `Cell<i64>` instead.

I'm not familiar with per CPU variables, but if you're usually storing
`Copy` types, then this is much better wrt not having unsafe code
everywhere.

If one also often stores `!Copy` types, then we might be able to get
away with `RefCell`, but that's a small runtime overhead -- which is
probably bad given that per cpu variables are most likely used for
performance reasons? In that case the user might just need to store
`UnsafeCell` and use unsafe regardless. (or we invent something
specifically for that case, eg tokens that are statically known to be
unique etc)

---
Cheers,
Benno

> +            pr_info!("The contents of pcpu are {}\n", val);
> +
> +            native += -1;
> +            *val += -1;
> +            pr_info!("Native: {}, *pcpu: {}\n", native, val);
> +            assert!(native == *val && native == -1);
> +
> +            native += 1;
> +            *val += 1;
> +            pr_info!("Native: {}, *pcpu: {}\n", native, val);
> +            assert!(native == *val && native == 0);
> +        });

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Mitchell Levy 2 months, 3 weeks ago

On Sun, Jul 13, 2025 at 11:30:31AM +0200, Benno Lossin wrote:
> On Sat Jul 12, 2025 at 11:31 PM CEST, Mitchell Levy wrote:
> > Add a short exercise for Rust's per-CPU variable API, modelled after
> > lib/percpu_test.c
> >
> > Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
> > ---
> >  lib/Kconfig.debug       |   9 ++++
> >  lib/Makefile            |   1 +
> >  lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++
> 
> I don't know if this is the correct place, the code looks much more like
> a sample, so why not place it there instead?

I don't feel particularly strongly either way --- I defaulted to `lib/`
since that's where the `percpu_test.c` I was working off of is located.
Happy to change for v3

> >  rust/helpers/percpu.c   |  11 +++++
> >  4 files changed, 141 insertions(+)
> > diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
> > new file mode 100644
> > index 000000000000..a9652e6ece08
> > --- /dev/null
> > +++ b/lib/percpu_test_rust.rs
> > @@ -0,0 +1,120 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +//! A simple self test for the rust per-CPU API.
> > +
> > +use core::ffi::c_void;
> > +
> > +use kernel::{
> > +    bindings::{on_each_cpu, smp_processor_id},
> > +    define_per_cpu,
> > +    percpu::{cpu_guard::*, *},
> > +    pr_info,
> > +    prelude::*,
> > +    unsafe_get_per_cpu,
> > +};
> > +
> > +module! {
> > +    type: PerCpuTestModule,
> > +    name: "percpu_test_rust",
> > +    author: "Mitchell Levy",
> > +    description: "Test code to exercise the Rust Per CPU variable API",
> > +    license: "GPL v2",
> > +}
> > +
> > +struct PerCpuTestModule;
> > +
> > +define_per_cpu!(PERCPU: i64 = 0);
> > +define_per_cpu!(UPERCPU: u64 = 0);
> > +
> > +impl kernel::Module for PerCpuTestModule {
> > +    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
> > +        pr_info!("rust percpu test start\n");
> > +
> > +        let mut native: i64 = 0;
> > +        // SAFETY: PERCPU is properly defined
> > +        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
> 
> I don't understand why we need unsafe here, can't we just create
> something specially in the `define_per_cpu` macro that is then confirmed
> by the `get_per_cpu!` macro and thus it can be safe?

As is, something like
    define_per_cpu!(PERCPU: i32 = 0);

    fn func() {
        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
    }
will compile, but any usage of `pcpu` will be UB. This is because
`unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I
know, the compiler does not do any checking of pointer casts. If you
have thoughts/ideas on how to get around this problem, I'd certainly
*like* to provide a safe API here :)

> > +        // SAFETY: We only have one PerCpu that points at PERCPU
> > +        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
> 
> Hmm I also don't like the unsafe part here...
> 
> Can't we use the same API that `thread_local!` in the standard library
> has:
> 
>     https://doc.rust-lang.org/std/macro.thread_local.html
> 
> So in this example you would store a `Cell<i64>` instead.
> 
> I'm not familiar with per CPU variables, but if you're usually storing
> `Copy` types, then this is much better wrt not having unsafe code
> everywhere.
> 
> If one also often stores `!Copy` types, then we might be able to get
> away with `RefCell`, but that's a small runtime overhead -- which is
> probably bad given that per cpu variables are most likely used for
> performance reasons? In that case the user might just need to store
> `UnsafeCell` and use unsafe regardless. (or we invent something
> specifically for that case, eg tokens that are statically known to be
> unique etc)

I'm open to including a specialization for `T: Copy` in a similar vein
to what I have here for numeric types. Off the top of my head, that
shouldn't require any user-facing `unsafe`. But yes, I believe there is
a significant amount of interest in having `!Copy` per-CPU variables.
(At least, I'm interested in having them around for experimenting with
using Rust for HV drivers.)

I would definitely like to avoid *requiring* the use of `RefCell` since,
as you mention, it does have a runtime overhead. Per-CPU variables can
be used for "logical" reasons rather than just as a performance
optimization, so there might be some cases where paying the runtime
overhead is ok. But that's certainly not true in all cases. That said,
perhaps there could be a safely obtainable token type that only passes a
`&T` (rather than a `&mut T`) to its closure, and then if a user doesn't
mind the runtime overhead, they can choose `T` to be a `RefCell`.
Thoughts?

For `UnsafeCell`, if a user of the API were to have something like a
`PerCpu<UnsafeCell<T>>` that safely spits out a `&UnsafeCell<T>`, my
understanding is that mutating the underlying `T` would require the
exact same safety guarantees as what's here, except now it'd need a much
bigger unsafe block and would have to do all of its manipulations via
pointers. That seems like a pretty big ergonomics burden without a clear
(to me) benefit.

> ---
> Cheers,
> Benno
> 
> > +            pr_info!("The contents of pcpu are {}\n", val);
> > +
> > +            native += -1;
> > +            *val += -1;
> > +            pr_info!("Native: {}, *pcpu: {}\n", native, val);
> > +            assert!(native == *val && native == -1);
> > +
> > +            native += 1;
> > +            *val += 1;
> > +            pr_info!("Native: {}, *pcpu: {}\n", native, val);
> > +            assert!(native == *val && native == 0);
> > +        });

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Benno Lossin 2 months, 3 weeks ago

On Tue Jul 15, 2025 at 12:31 PM CEST, Mitchell Levy wrote:
> On Sun, Jul 13, 2025 at 11:30:31AM +0200, Benno Lossin wrote:
>> On Sat Jul 12, 2025 at 11:31 PM CEST, Mitchell Levy wrote:
>> > Add a short exercise for Rust's per-CPU variable API, modelled after
>> > lib/percpu_test.c
>> >
>> > Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
>> > ---
>> >  lib/Kconfig.debug       |   9 ++++
>> >  lib/Makefile            |   1 +
>> >  lib/percpu_test_rust.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 
>> I don't know if this is the correct place, the code looks much more like
>> a sample, so why not place it there instead?
>
> I don't feel particularly strongly either way --- I defaulted to `lib/`
> since that's where the `percpu_test.c` I was working off of is located.
> Happy to change for v3

Since we don't have Rust stuff in lib/ yet (and that the code looks much
more like the samples we already have) I think putting it in
samples/rust is better.

>> >  rust/helpers/percpu.c   |  11 +++++
>> >  4 files changed, 141 insertions(+)
>> > diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
>> > new file mode 100644
>> > index 000000000000..a9652e6ece08
>> > --- /dev/null
>> > +++ b/lib/percpu_test_rust.rs
>> > @@ -0,0 +1,120 @@
>> > +// SPDX-License-Identifier: GPL-2.0
>> > +//! A simple self test for the rust per-CPU API.
>> > +
>> > +use core::ffi::c_void;
>> > +
>> > +use kernel::{
>> > +    bindings::{on_each_cpu, smp_processor_id},
>> > +    define_per_cpu,
>> > +    percpu::{cpu_guard::*, *},
>> > +    pr_info,
>> > +    prelude::*,
>> > +    unsafe_get_per_cpu,
>> > +};
>> > +
>> > +module! {
>> > +    type: PerCpuTestModule,
>> > +    name: "percpu_test_rust",
>> > +    author: "Mitchell Levy",
>> > +    description: "Test code to exercise the Rust Per CPU variable API",
>> > +    license: "GPL v2",
>> > +}
>> > +
>> > +struct PerCpuTestModule;
>> > +
>> > +define_per_cpu!(PERCPU: i64 = 0);
>> > +define_per_cpu!(UPERCPU: u64 = 0);
>> > +
>> > +impl kernel::Module for PerCpuTestModule {
>> > +    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
>> > +        pr_info!("rust percpu test start\n");
>> > +
>> > +        let mut native: i64 = 0;
>> > +        // SAFETY: PERCPU is properly defined
>> > +        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
>> 
>> I don't understand why we need unsafe here, can't we just create
>> something specially in the `define_per_cpu` macro that is then confirmed
>> by the `get_per_cpu!` macro and thus it can be safe?
>
> As is, something like
>     define_per_cpu!(PERCPU: i32 = 0);
>
>     fn func() {
>         let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
>     }
> will compile, but any usage of `pcpu` will be UB. This is because
> `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I
> know, the compiler does not do any checking of pointer casts. If you
> have thoughts/ideas on how to get around this problem, I'd certainly
> *like* to provide a safe API here :)

I haven't taken a look at your implementation, but you do have the type
declared in `define_per_cpu!`, so it's a bit of a mystery to me why you
can't get that out in `unsafe_get_per_cpu!`...

Maybe in a few weeks I'll be able to take a closer look.

>> > +        // SAFETY: We only have one PerCpu that points at PERCPU
>> > +        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
>> 
>> Hmm I also don't like the unsafe part here...
>> 
>> Can't we use the same API that `thread_local!` in the standard library
>> has:
>> 
>>     https://doc.rust-lang.org/std/macro.thread_local.html
>> 
>> So in this example you would store a `Cell<i64>` instead.
>> 
>> I'm not familiar with per CPU variables, but if you're usually storing
>> `Copy` types, then this is much better wrt not having unsafe code
>> everywhere.
>> 
>> If one also often stores `!Copy` types, then we might be able to get
>> away with `RefCell`, but that's a small runtime overhead -- which is
>> probably bad given that per cpu variables are most likely used for
>> performance reasons? In that case the user might just need to store
>> `UnsafeCell` and use unsafe regardless. (or we invent something
>> specifically for that case, eg tokens that are statically known to be
>> unique etc)
>
> I'm open to including a specialization for `T: Copy` in a similar vein
> to what I have here for numeric types. Off the top of my head, that
> shouldn't require any user-facing `unsafe`. But yes, I believe there is
> a significant amount of interest in having `!Copy` per-CPU variables.
> (At least, I'm interested in having them around for experimenting with
> using Rust for HV drivers.)

What kinds of types would you like to store? Allocations? Just integers
in bigger structs? Mutexes?

> I would definitely like to avoid *requiring* the use of `RefCell` since,
> as you mention, it does have a runtime overhead. Per-CPU variables can
> be used for "logical" reasons rather than just as a performance
> optimization, so there might be some cases where paying the runtime
> overhead is ok. But that's certainly not true in all cases. That said,
> perhaps there could be a safely obtainable token type that only passes a
> `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't
> mind the runtime overhead, they can choose `T` to be a `RefCell`.
> Thoughts?

So I think using an API similar to `thread_local!` will allow us to have
multiple other APIs that slot into that. `Cell<T>` for `T: Copy`,
`RefCell<T>` for cases where you don't care about the runtime overhead,
plain `T` for cases where you only need `&T`. For the case where you
need `&mut T`, we could have something like a `TokenCell<T>` that gives
out a token that you need to mutably borrow in order to get `&mut T`.
Finally for anything else that is too restricted by this, users can also
use `UnsafeCell<T>` although that requires `unsafe`.

I think the advantage of this is that the common cases are all safe and
very idiomatic. In the current design, you *always* have to use unsafe.

> For `UnsafeCell`, if a user of the API were to have something like a
> `PerCpu<UnsafeCell<T>>` that safely spits out a `&UnsafeCell<T>`, my
> understanding is that mutating the underlying `T` would require the
> exact same safety guarantees as what's here, except now it'd need a much
> bigger unsafe block and would have to do all of its manipulations via
> pointers. That seems like a pretty big ergonomics burden without a clear
> (to me) benefit.

It would require the same amount of unsafe & safety comments, but it
wouldn't be bigger comments, since you can just as well create `&mut T`
to the value.

---
Cheers,
Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Boqun Feng 2 months, 3 weeks ago

On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote:
[...]
> >> > +impl kernel::Module for PerCpuTestModule {
> >> > +    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
> >> > +        pr_info!("rust percpu test start\n");
> >> > +
> >> > +        let mut native: i64 = 0;
> >> > +        // SAFETY: PERCPU is properly defined
> >> > +        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
> >> 
> >> I don't understand why we need unsafe here, can't we just create
> >> something specially in the `define_per_cpu` macro that is then confirmed
> >> by the `get_per_cpu!` macro and thus it can be safe?
> >
> > As is, something like
> >     define_per_cpu!(PERCPU: i32 = 0);
> >
> >     fn func() {
> >         let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
> >     }
> > will compile, but any usage of `pcpu` will be UB. This is because
> > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I
> > know, the compiler does not do any checking of pointer casts. If you
> > have thoughts/ideas on how to get around this problem, I'd certainly
> > *like* to provide a safe API here :)
> 
> I haven't taken a look at your implementation, but you do have the type
> declared in `define_per_cpu!`, so it's a bit of a mystery to me why you
> can't get that out in `unsafe_get_per_cpu!`...
> 
> Maybe in a few weeks I'll be able to take a closer look.
> 
> >> > +        // SAFETY: We only have one PerCpu that points at PERCPU
> >> > +        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
> >> 
> >> Hmm I also don't like the unsafe part here...
> >> 
> >> Can't we use the same API that `thread_local!` in the standard library

First of all, `thread_local!` has to be implemented by some sys-specific
unsafe mechanism, right? For example on unix, I think it's using
pthread_key_t:

	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html

what we are implementing (or wrapping) is the very basic unsafe
mechanism for percpu here. Surely we can explore the design for a safe
API, but the unsafe mechanism is probably necessary to look into at
first.

> >> has:
> >> 
> >>     https://doc.rust-lang.org/std/macro.thread_local.html
> >> 
> >> So in this example you would store a `Cell<i64>` instead.
> >> 
> >> I'm not familiar with per CPU variables, but if you're usually storing
> >> `Copy` types, then this is much better wrt not having unsafe code
> >> everywhere.
> >> 
> >> If one also often stores `!Copy` types, then we might be able to get
> >> away with `RefCell`, but that's a small runtime overhead -- which is
> >> probably bad given that per cpu variables are most likely used for
> >> performance reasons? In that case the user might just need to store
> >> `UnsafeCell` and use unsafe regardless. (or we invent something

This sounds reasonable to me.

> >> specifically for that case, eg tokens that are statically known to be
> >> unique etc)
> >
> > I'm open to including a specialization for `T: Copy` in a similar vein
> > to what I have here for numeric types. Off the top of my head, that
> > shouldn't require any user-facing `unsafe`. But yes, I believe there is
> > a significant amount of interest in having `!Copy` per-CPU variables.
> > (At least, I'm interested in having them around for experimenting with
> > using Rust for HV drivers.)
> 
> What kinds of types would you like to store? Allocations? Just integers
> in bigger structs? Mutexes?
> 

In the VMBus driver, there is a percpu work_struct.

> > I would definitely like to avoid *requiring* the use of `RefCell` since,
> > as you mention, it does have a runtime overhead. Per-CPU variables can
> > be used for "logical" reasons rather than just as a performance
> > optimization, so there might be some cases where paying the runtime
> > overhead is ok. But that's certainly not true in all cases. That said,
> > perhaps there could be a safely obtainable token type that only passes a
> > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't
> > mind the runtime overhead, they can choose `T` to be a `RefCell`.
> > Thoughts?
> 
> So I think using an API similar to `thread_local!` will allow us to have
> multiple other APIs that slot into that. `Cell<T>` for `T: Copy`,
> `RefCell<T>` for cases where you don't care about the runtime overhead,
> plain `T` for cases where you only need `&T`. For the case where you
> need `&mut T`, we could have something like a `TokenCell<T>` that gives
> out a token that you need to mutably borrow in order to get `&mut T`.
> Finally for anything else that is too restricted by this, users can also
> use `UnsafeCell<T>` although that requires `unsafe`.
> 
> I think the advantage of this is that the common cases are all safe and
> very idiomatic. In the current design, you *always* have to use unsafe.
> 

I agree, but like I said, we need to figure out the unsafe interface
that C already uses and build API upon it. I think focusing on the
unsafe mechanism may be the way to start: you cannot implement something
that cannot be implemented, and we don't have the magic pthread_key here
;-)

Regards,
Boqun

> > For `UnsafeCell`, if a user of the API were to have something like a
> > `PerCpu<UnsafeCell<T>>` that safely spits out a `&UnsafeCell<T>`, my
> > understanding is that mutating the underlying `T` would require the
> > exact same safety guarantees as what's here, except now it'd need a much
> > bigger unsafe block and would have to do all of its manipulations via
> > pointers. That seems like a pretty big ergonomics burden without a clear
> > (to me) benefit.
> 
> It would require the same amount of unsafe & safety comments, but it
> wouldn't be bigger comments, since you can just as well create `&mut T`
> to the value.
> 
> ---
> Cheers,
> Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Benno Lossin 2 months, 3 weeks ago

On Tue Jul 15, 2025 at 4:10 PM CEST, Boqun Feng wrote:
> On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote:
> [...]
>> >> > +impl kernel::Module for PerCpuTestModule {
>> >> > +    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
>> >> > +        pr_info!("rust percpu test start\n");
>> >> > +
>> >> > +        let mut native: i64 = 0;
>> >> > +        // SAFETY: PERCPU is properly defined
>> >> > +        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
>> >> 
>> >> I don't understand why we need unsafe here, can't we just create
>> >> something specially in the `define_per_cpu` macro that is then confirmed
>> >> by the `get_per_cpu!` macro and thus it can be safe?
>> >
>> > As is, something like
>> >     define_per_cpu!(PERCPU: i32 = 0);
>> >
>> >     fn func() {
>> >         let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
>> >     }
>> > will compile, but any usage of `pcpu` will be UB. This is because
>> > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I
>> > know, the compiler does not do any checking of pointer casts. If you
>> > have thoughts/ideas on how to get around this problem, I'd certainly
>> > *like* to provide a safe API here :)
>> 
>> I haven't taken a look at your implementation, but you do have the type
>> declared in `define_per_cpu!`, so it's a bit of a mystery to me why you
>> can't get that out in `unsafe_get_per_cpu!`...
>> 
>> Maybe in a few weeks I'll be able to take a closer look.
>> 
>> >> > +        // SAFETY: We only have one PerCpu that points at PERCPU
>> >> > +        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
>> >> 
>> >> Hmm I also don't like the unsafe part here...
>> >> 
>> >> Can't we use the same API that `thread_local!` in the standard library
>
> First of all, `thread_local!` has to be implemented by some sys-specific
> unsafe mechanism, right? For example on unix, I think it's using
> pthread_key_t:
>
> 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
>
> what we are implementing (or wrapping) is the very basic unsafe
> mechanism for percpu here. Surely we can explore the design for a safe
> API, but the unsafe mechanism is probably necessary to look into at
> first.

But this is intended to be used by drivers, right? If so, then we should
do our usual due diligence and work out a safe abstraction. Only fall
back to unsafe if it isn't possible.

I'm not familiar with percpu, but from the name I assumed that it's
"just a variable for each cpu" so similar to `thread_local!`, but it's
bound to the specific cpu instead of the thread.

That in my mind should be rather easy to support in Rust at least with
the thread_local-style API. You just need to ensure that no reference
can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
to detect context switches.

>> >> has:
>> >> 
>> >>     https://doc.rust-lang.org/std/macro.thread_local.html
>> >> 
>> >> So in this example you would store a `Cell<i64>` instead.
>> >> 
>> >> I'm not familiar with per CPU variables, but if you're usually storing
>> >> `Copy` types, then this is much better wrt not having unsafe code
>> >> everywhere.
>> >> 
>> >> If one also often stores `!Copy` types, then we might be able to get
>> >> away with `RefCell`, but that's a small runtime overhead -- which is
>> >> probably bad given that per cpu variables are most likely used for
>> >> performance reasons? In that case the user might just need to store
>> >> `UnsafeCell` and use unsafe regardless. (or we invent something
>
> This sounds reasonable to me.
>
>> >> specifically for that case, eg tokens that are statically known to be
>> >> unique etc)
>> >
>> > I'm open to including a specialization for `T: Copy` in a similar vein
>> > to what I have here for numeric types. Off the top of my head, that
>> > shouldn't require any user-facing `unsafe`. But yes, I believe there is
>> > a significant amount of interest in having `!Copy` per-CPU variables.
>> > (At least, I'm interested in having them around for experimenting with
>> > using Rust for HV drivers.)
>> 
>> What kinds of types would you like to store? Allocations? Just integers
>> in bigger structs? Mutexes?
>> 
>
> In the VMBus driver, there is a percpu work_struct.

Do you have a link? Or better yet a Rust struct description of what you
think it will look like :)

>> > I would definitely like to avoid *requiring* the use of `RefCell` since,
>> > as you mention, it does have a runtime overhead. Per-CPU variables can
>> > be used for "logical" reasons rather than just as a performance
>> > optimization, so there might be some cases where paying the runtime
>> > overhead is ok. But that's certainly not true in all cases. That said,
>> > perhaps there could be a safely obtainable token type that only passes a
>> > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't
>> > mind the runtime overhead, they can choose `T` to be a `RefCell`.
>> > Thoughts?
>> 
>> So I think using an API similar to `thread_local!` will allow us to have
>> multiple other APIs that slot into that. `Cell<T>` for `T: Copy`,
>> `RefCell<T>` for cases where you don't care about the runtime overhead,
>> plain `T` for cases where you only need `&T`. For the case where you
>> need `&mut T`, we could have something like a `TokenCell<T>` that gives
>> out a token that you need to mutably borrow in order to get `&mut T`.
>> Finally for anything else that is too restricted by this, users can also
>> use `UnsafeCell<T>` although that requires `unsafe`.
>> 
>> I think the advantage of this is that the common cases are all safe and
>> very idiomatic. In the current design, you *always* have to use unsafe.
>> 
>
> I agree, but like I said, we need to figure out the unsafe interface
> that C already uses and build API upon it. I think focusing on the
> unsafe mechanism may be the way to start: you cannot implement something
> that cannot be implemented, and we don't have the magic pthread_key here
> ;-)

Sure we can do some experimentation, but I don't think we should put
unsafe abstractions upstream that we intend to replace with a safe
abstraction later. Otherwise people are going to depend on it and it's
going to be a mess. Do the experimenting out of tree and learn there.

---
Cheers,
Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Boqun Feng 2 months, 3 weeks ago

On Tue, Jul 15, 2025 at 05:55:13PM +0200, Benno Lossin wrote:
> On Tue Jul 15, 2025 at 4:10 PM CEST, Boqun Feng wrote:
> > On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote:
> > [...]
> >> >> > +impl kernel::Module for PerCpuTestModule {
> >> >> > +    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
> >> >> > +        pr_info!("rust percpu test start\n");
> >> >> > +
> >> >> > +        let mut native: i64 = 0;
> >> >> > +        // SAFETY: PERCPU is properly defined
> >> >> > +        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
> >> >> 
> >> >> I don't understand why we need unsafe here, can't we just create
> >> >> something specially in the `define_per_cpu` macro that is then confirmed
> >> >> by the `get_per_cpu!` macro and thus it can be safe?
> >> >
> >> > As is, something like
> >> >     define_per_cpu!(PERCPU: i32 = 0);
> >> >
> >> >     fn func() {
> >> >         let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
> >> >     }
> >> > will compile, but any usage of `pcpu` will be UB. This is because
> >> > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I
> >> > know, the compiler does not do any checking of pointer casts. If you
> >> > have thoughts/ideas on how to get around this problem, I'd certainly
> >> > *like* to provide a safe API here :)
> >> 
> >> I haven't taken a look at your implementation, but you do have the type
> >> declared in `define_per_cpu!`, so it's a bit of a mystery to me why you
> >> can't get that out in `unsafe_get_per_cpu!`...
> >> 
> >> Maybe in a few weeks I'll be able to take a closer look.
> >> 
> >> >> > +        // SAFETY: We only have one PerCpu that points at PERCPU
> >> >> > +        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
> >> >> 
> >> >> Hmm I also don't like the unsafe part here...
> >> >> 
> >> >> Can't we use the same API that `thread_local!` in the standard library
> >
> > First of all, `thread_local!` has to be implemented by some sys-specific
> > unsafe mechanism, right? For example on unix, I think it's using
> > pthread_key_t:
> >
> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
> >
> > what we are implementing (or wrapping) is the very basic unsafe
> > mechanism for percpu here. Surely we can explore the design for a safe
> > API, but the unsafe mechanism is probably necessary to look into at
> > first.
> 
> But this is intended to be used by drivers, right? If so, then we should

Not necessarily only for drivers, we can also use it for implementing
other safe abstraction (e.g. hazard pointers, percpu counters etc)

> do our usual due diligence and work out a safe abstraction. Only fall
> back to unsafe if it isn't possible.
> 

All I'm saying is instead of figuring out a safe abstraction at first,
we should probably focus on identifying how to implement it and which
part is really unsafe and the safety requirement for that.

> I'm not familiar with percpu, but from the name I assumed that it's
> "just a variable for each cpu" so similar to `thread_local!`, but it's
> bound to the specific cpu instead of the thread.
> 
> That in my mind should be rather easy to support in Rust at least with
> the thread_local-style API. You just need to ensure that no reference
> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint

Not really, in kernel, we have plenty of use cases that we read the
other CPU's percpu variables. For example, each CPU keeps it's own
counter and we sum them other in another CPU.

If we would like to model it conceptually, it's more like an array
that's index by CpuId to me.

> to detect context switches.
> 
> >> >> has:
> >> >> 
> >> >>     https://doc.rust-lang.org/std/macro.thread_local.html
> >> >> 
> >> >> So in this example you would store a `Cell<i64>` instead.
> >> >> 
> >> >> I'm not familiar with per CPU variables, but if you're usually storing
> >> >> `Copy` types, then this is much better wrt not having unsafe code
> >> >> everywhere.
> >> >> 
> >> >> If one also often stores `!Copy` types, then we might be able to get
> >> >> away with `RefCell`, but that's a small runtime overhead -- which is
> >> >> probably bad given that per cpu variables are most likely used for
> >> >> performance reasons? In that case the user might just need to store
> >> >> `UnsafeCell` and use unsafe regardless. (or we invent something
> >
> > This sounds reasonable to me.
> >
> >> >> specifically for that case, eg tokens that are statically known to be
> >> >> unique etc)
> >> >
> >> > I'm open to including a specialization for `T: Copy` in a similar vein
> >> > to what I have here for numeric types. Off the top of my head, that
> >> > shouldn't require any user-facing `unsafe`. But yes, I believe there is
> >> > a significant amount of interest in having `!Copy` per-CPU variables.
> >> > (At least, I'm interested in having them around for experimenting with
> >> > using Rust for HV drivers.)
> >> 
> >> What kinds of types would you like to store? Allocations? Just integers
> >> in bigger structs? Mutexes?
> >> 
> >
> > In the VMBus driver, there is a percpu work_struct.
> 
> Do you have a link? Or better yet a Rust struct description of what you
> think it will look like :)
> 

Not Rust code yet, but here is the corresponding C code:

	https://github.com/Rust-for-Linux/linux/blob/rust-next/drivers/hv/vmbus_drv.c#L1396

But please note that we are not solely developing the abstraction for
this usage, but more for generally understand how to wrap percpu
functionality similar to the usage in C.

> >> > I would definitely like to avoid *requiring* the use of `RefCell` since,
> >> > as you mention, it does have a runtime overhead. Per-CPU variables can
> >> > be used for "logical" reasons rather than just as a performance
> >> > optimization, so there might be some cases where paying the runtime
> >> > overhead is ok. But that's certainly not true in all cases. That said,
> >> > perhaps there could be a safely obtainable token type that only passes a
> >> > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't
> >> > mind the runtime overhead, they can choose `T` to be a `RefCell`.
> >> > Thoughts?
> >> 
> >> So I think using an API similar to `thread_local!` will allow us to have
> >> multiple other APIs that slot into that. `Cell<T>` for `T: Copy`,
> >> `RefCell<T>` for cases where you don't care about the runtime overhead,
> >> plain `T` for cases where you only need `&T`. For the case where you
> >> need `&mut T`, we could have something like a `TokenCell<T>` that gives
> >> out a token that you need to mutably borrow in order to get `&mut T`.
> >> Finally for anything else that is too restricted by this, users can also
> >> use `UnsafeCell<T>` although that requires `unsafe`.
> >> 
> >> I think the advantage of this is that the common cases are all safe and
> >> very idiomatic. In the current design, you *always* have to use unsafe.
> >> 
> >
> > I agree, but like I said, we need to figure out the unsafe interface
> > that C already uses and build API upon it. I think focusing on the
> > unsafe mechanism may be the way to start: you cannot implement something
> > that cannot be implemented, and we don't have the magic pthread_key here
> > ;-)
> 
> Sure we can do some experimentation, but I don't think we should put
> unsafe abstractions upstream that we intend to replace with a safe
> abstraction later. Otherwise people are going to depend on it and it's

I doubt we can replace the unsafe abstraction with a safe one, if users
really care the performance then they would really need to use some
unsafe API to build their safe abstraction.

> going to be a mess. Do the experimenting out of tree and learn there.

I disagree, Rust as a language its own should be able to do what C does
including being able to implement the percpu functionality same as C,
there is nothing wrong with a set of Rust primitives in the kernel that
provides fundamental percpu functionality the other core facilities can
rely on. The better part is that it will have all the safety requirement
documented well.

Regards,
Boqun

> 
> ---
> Cheers,
> Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Benno Lossin 2 months, 3 weeks ago

On Tue Jul 15, 2025 at 6:31 PM CEST, Boqun Feng wrote:
> On Tue, Jul 15, 2025 at 05:55:13PM +0200, Benno Lossin wrote:
>> On Tue Jul 15, 2025 at 4:10 PM CEST, Boqun Feng wrote:
>> > On Tue, Jul 15, 2025 at 01:31:06PM +0200, Benno Lossin wrote:
>> > [...]
>> >> >> > +impl kernel::Module for PerCpuTestModule {
>> >> >> > +    fn init(_module: &'static ThisModule) -> Result<Self, Error> {
>> >> >> > +        pr_info!("rust percpu test start\n");
>> >> >> > +
>> >> >> > +        let mut native: i64 = 0;
>> >> >> > +        // SAFETY: PERCPU is properly defined
>> >> >> > +        let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
>> >> >> 
>> >> >> I don't understand why we need unsafe here, can't we just create
>> >> >> something specially in the `define_per_cpu` macro that is then confirmed
>> >> >> by the `get_per_cpu!` macro and thus it can be safe?
>> >> >
>> >> > As is, something like
>> >> >     define_per_cpu!(PERCPU: i32 = 0);
>> >> >
>> >> >     fn func() {
>> >> >         let mut pcpu: StaticPerCpu<i64> = unsafe { unsafe_get_per_cpu!(PERCPU) };
>> >> >     }
>> >> > will compile, but any usage of `pcpu` will be UB. This is because
>> >> > `unsafe_get_per_cpu!` is just blindly casting pointers and, as far as I
>> >> > know, the compiler does not do any checking of pointer casts. If you
>> >> > have thoughts/ideas on how to get around this problem, I'd certainly
>> >> > *like* to provide a safe API here :)
>> >> 
>> >> I haven't taken a look at your implementation, but you do have the type
>> >> declared in `define_per_cpu!`, so it's a bit of a mystery to me why you
>> >> can't get that out in `unsafe_get_per_cpu!`...
>> >> 
>> >> Maybe in a few weeks I'll be able to take a closer look.
>> >> 
>> >> >> > +        // SAFETY: We only have one PerCpu that points at PERCPU
>> >> >> > +        unsafe { pcpu.get(CpuGuard::new()) }.with(|val: &mut i64| {
>> >> >> 
>> >> >> Hmm I also don't like the unsafe part here...
>> >> >> 
>> >> >> Can't we use the same API that `thread_local!` in the standard library
>> >
>> > First of all, `thread_local!` has to be implemented by some sys-specific
>> > unsafe mechanism, right? For example on unix, I think it's using
>> > pthread_key_t:
>> >
>> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
>> >
>> > what we are implementing (or wrapping) is the very basic unsafe
>> > mechanism for percpu here. Surely we can explore the design for a safe
>> > API, but the unsafe mechanism is probably necessary to look into at
>> > first.
>> 
>> But this is intended to be used by drivers, right? If so, then we should
>
> Not necessarily only for drivers, we can also use it for implementing
> other safe abstraction (e.g. hazard pointers, percpu counters etc)

That's fair, but then it should be `pub(crate)`.

>> do our usual due diligence and work out a safe abstraction. Only fall
>> back to unsafe if it isn't possible.
>> 
>
> All I'm saying is instead of figuring out a safe abstraction at first,
> we should probably focus on identifying how to implement it and which
> part is really unsafe and the safety requirement for that.

Yeah. But then we should do that before merging :)

>> I'm not familiar with percpu, but from the name I assumed that it's
>> "just a variable for each cpu" so similar to `thread_local!`, but it's
>> bound to the specific cpu instead of the thread.
>> 
>> That in my mind should be rather easy to support in Rust at least with
>> the thread_local-style API. You just need to ensure that no reference
>> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
>
> Not really, in kernel, we have plenty of use cases that we read the
> other CPU's percpu variables. For example, each CPU keeps it's own
> counter and we sum them other in another CPU.

But then you need some sort of synchronization?

> If we would like to model it conceptually, it's more like an array
> that's index by CpuId to me.

Gotcha, but this model is missing the access control/synchronization. So
I'm not so sure how useful it is.

(I think I asked this somewhere else, but the number of CPUs doesn't
change, right?)

>> to detect context switches.
>> 
>> >> >> has:
>> >> >> 
>> >> >>     https://doc.rust-lang.org/std/macro.thread_local.html
>> >> >> 
>> >> >> So in this example you would store a `Cell<i64>` instead.
>> >> >> 
>> >> >> I'm not familiar with per CPU variables, but if you're usually storing
>> >> >> `Copy` types, then this is much better wrt not having unsafe code
>> >> >> everywhere.
>> >> >> 
>> >> >> If one also often stores `!Copy` types, then we might be able to get
>> >> >> away with `RefCell`, but that's a small runtime overhead -- which is
>> >> >> probably bad given that per cpu variables are most likely used for
>> >> >> performance reasons? In that case the user might just need to store
>> >> >> `UnsafeCell` and use unsafe regardless. (or we invent something
>> >
>> > This sounds reasonable to me.
>> >
>> >> >> specifically for that case, eg tokens that are statically known to be
>> >> >> unique etc)
>> >> >
>> >> > I'm open to including a specialization for `T: Copy` in a similar vein
>> >> > to what I have here for numeric types. Off the top of my head, that
>> >> > shouldn't require any user-facing `unsafe`. But yes, I believe there is
>> >> > a significant amount of interest in having `!Copy` per-CPU variables.
>> >> > (At least, I'm interested in having them around for experimenting with
>> >> > using Rust for HV drivers.)
>> >> 
>> >> What kinds of types would you like to store? Allocations? Just integers
>> >> in bigger structs? Mutexes?
>> >> 
>> >
>> > In the VMBus driver, there is a percpu work_struct.
>> 
>> Do you have a link? Or better yet a Rust struct description of what you
>> think it will look like :)
>> 
>
> Not Rust code yet, but here is the corresponding C code:
>
> 	https://github.com/Rust-for-Linux/linux/blob/rust-next/drivers/hv/vmbus_drv.c#L1396

Thanks!

> But please note that we are not solely developing the abstraction for
> this usage, but more for generally understand how to wrap percpu
> functionality similar to the usage in C.

Well, I have to start somewhere for looking at the use-cases :)

If you have more, just let me see. (probably won't have enough time to
look at them now, but maybe in a couple weeks)

>> >> > I would definitely like to avoid *requiring* the use of `RefCell` since,
>> >> > as you mention, it does have a runtime overhead. Per-CPU variables can
>> >> > be used for "logical" reasons rather than just as a performance
>> >> > optimization, so there might be some cases where paying the runtime
>> >> > overhead is ok. But that's certainly not true in all cases. That said,
>> >> > perhaps there could be a safely obtainable token type that only passes a
>> >> > `&T` (rather than a `&mut T`) to its closure, and then if a user doesn't
>> >> > mind the runtime overhead, they can choose `T` to be a `RefCell`.
>> >> > Thoughts?
>> >> 
>> >> So I think using an API similar to `thread_local!` will allow us to have
>> >> multiple other APIs that slot into that. `Cell<T>` for `T: Copy`,
>> >> `RefCell<T>` for cases where you don't care about the runtime overhead,
>> >> plain `T` for cases where you only need `&T`. For the case where you
>> >> need `&mut T`, we could have something like a `TokenCell<T>` that gives
>> >> out a token that you need to mutably borrow in order to get `&mut T`.
>> >> Finally for anything else that is too restricted by this, users can also
>> >> use `UnsafeCell<T>` although that requires `unsafe`.
>> >> 
>> >> I think the advantage of this is that the common cases are all safe and
>> >> very idiomatic. In the current design, you *always* have to use unsafe.
>> >> 
>> >
>> > I agree, but like I said, we need to figure out the unsafe interface
>> > that C already uses and build API upon it. I think focusing on the
>> > unsafe mechanism may be the way to start: you cannot implement something
>> > that cannot be implemented, and we don't have the magic pthread_key here
>> > ;-)
>> 
>> Sure we can do some experimentation, but I don't think we should put
>> unsafe abstractions upstream that we intend to replace with a safe
>> abstraction later. Otherwise people are going to depend on it and it's
>
> I doubt we can replace the unsafe abstraction with a safe one, if users
> really care the performance then they would really need to use some
> unsafe API to build their safe abstraction.

That sounds pretty pessimistic, why do you think that?

>> going to be a mess. Do the experimenting out of tree and learn there.
>
> I disagree, Rust as a language its own should be able to do what C does
> including being able to implement the percpu functionality same as C,
> there is nothing wrong with a set of Rust primitives in the kernel that
> provides fundamental percpu functionality the other core facilities can
> rely on. The better part is that it will have all the safety requirement
> documented well.

Sure, but we haven't even tried to make it safe, so I don't think we
should add them now in this state.

---
Cheers,
Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Boqun Feng 2 months, 3 weeks ago

On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote:
[...]
> >> 
> >> Sure we can do some experimentation, but I don't think we should put
> >> unsafe abstractions upstream that we intend to replace with a safe
> >> abstraction later. Otherwise people are going to depend on it and it's
> >
> > I doubt we can replace the unsafe abstraction with a safe one, if users
> > really care the performance then they would really need to use some
> > unsafe API to build their safe abstraction.
> 
> That sounds pretty pessimistic, why do you think that?
> 

I could ask you the similar, you barely know the implementation and
usage the percpu, why do you think it's possible? ;-)

Regards,
Boqun

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Boqun Feng 2 months, 3 weeks ago

On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote:
[...]
> >> >
> >> > First of all, `thread_local!` has to be implemented by some sys-specific
> >> > unsafe mechanism, right? For example on unix, I think it's using
> >> > pthread_key_t:
> >> >
> >> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
> >> >
> >> > what we are implementing (or wrapping) is the very basic unsafe
> >> > mechanism for percpu here. Surely we can explore the design for a safe
> >> > API, but the unsafe mechanism is probably necessary to look into at
> >> > first.
> >> 
> >> But this is intended to be used by drivers, right? If so, then we should
> >
> > Not necessarily only for drivers, we can also use it for implementing
> > other safe abstraction (e.g. hazard pointers, percpu counters etc)
> 
> That's fair, but then it should be `pub(crate)`.
> 

Fine by me, but please see below.

> >> do our usual due diligence and work out a safe abstraction. Only fall
> >> back to unsafe if it isn't possible.
> >> 
> >
> > All I'm saying is instead of figuring out a safe abstraction at first,
> > we should probably focus on identifying how to implement it and which
> > part is really unsafe and the safety requirement for that.
> 
> Yeah. But then we should do that before merging :)
> 

Well, who's talknig about merging? ;-) I thought we just began reviewing
here ;-)

> >> I'm not familiar with percpu, but from the name I assumed that it's
> >> "just a variable for each cpu" so similar to `thread_local!`, but it's
> >> bound to the specific cpu instead of the thread.
> >> 
> >> That in my mind should be rather easy to support in Rust at least with
> >> the thread_local-style API. You just need to ensure that no reference
> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
> >
> > Not really, in kernel, we have plenty of use cases that we read the
> > other CPU's percpu variables. For example, each CPU keeps it's own
> > counter and we sum them other in another CPU.
> 
> But then you need some sort of synchronization?
> 

Right, but the synchronization can exist either in the percpu operations
themselves or outside the percpu operations. Some cases, the data types
are small enough to fit in atomic data types, and operations are just
load/store/cmpxchg etc, then operations on the current cpu and remote
read will be naturally synchronized. Sometimes extra synchronization is
needed.

Keyword find all these cases are `per_cpu_ptr()`:

	https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr

> > If we would like to model it conceptually, it's more like an array
> > that's index by CpuId to me.
> 
> Gotcha, but this model is missing the access control/synchronization. So
> I'm not so sure how useful it is.
> 
> (I think I asked this somewhere else, but the number of CPUs doesn't
> change, right?)
> 

In terms of percpu variable, yes. A percpu variable is even available
for an offline CPU.

> >> to detect context switches.
> >> 
> >> >> >> has:
> >> >> >> 
> >> >> >>     https://doc.rust-lang.org/std/macro.thread_local.html
> >> >> >> 
> >> >> >> So in this example you would store a `Cell<i64>` instead.
> >> >> >> 
> >> >> >> I'm not familiar with per CPU variables, but if you're usually storing
> >> >> >> `Copy` types, then this is much better wrt not having unsafe code
> >> >> >> everywhere.
> >> >> >> 
> >> >> >> If one also often stores `!Copy` types, then we might be able to get
> >> >> >> away with `RefCell`, but that's a small runtime overhead -- which is
> >> >> >> probably bad given that per cpu variables are most likely used for
> >> >> >> performance reasons? In that case the user might just need to store
> >> >> >> `UnsafeCell` and use unsafe regardless. (or we invent something
> >> >
> >> > This sounds reasonable to me.
> >> >
> >> >> >> specifically for that case, eg tokens that are statically known to be
> >> >> >> unique etc)
> >> >> >
> >> >> > I'm open to including a specialization for `T: Copy` in a similar vein
> >> >> > to what I have here for numeric types. Off the top of my head, that
> >> >> > shouldn't require any user-facing `unsafe`. But yes, I believe there is
> >> >> > a significant amount of interest in having `!Copy` per-CPU variables.
> >> >> > (At least, I'm interested in having them around for experimenting with
> >> >> > using Rust for HV drivers.)
> >> >> 
> >> >> What kinds of types would you like to store? Allocations? Just integers
> >> >> in bigger structs? Mutexes?
> >> >> 
> >> >
> >> > In the VMBus driver, there is a percpu work_struct.
> >> 
> >> Do you have a link? Or better yet a Rust struct description of what you
> >> think it will look like :)
> >> 
> >
> > Not Rust code yet, but here is the corresponding C code:
> >
> > 	https://github.com/Rust-for-Linux/linux/blob/rust-next/drivers/hv/vmbus_drv.c#L1396
> 
> Thanks!
> 
> > But please note that we are not solely developing the abstraction for
> > this usage, but more for generally understand how to wrap percpu
> > functionality similar to the usage in C.
> 
> Well, I have to start somewhere for looking at the use-cases :)
> 
> If you have more, just let me see. (probably won't have enough time to
> look at them now, but maybe in a couple weeks)
> 

If you have time, feel free to take a look at hazard pointers;

https://lore.kernel.org/lkml/20240917143402.930114-1-boqun.feng@gmail.com/
https://lore.kernel.org/lkml/20250625031101.12555-1-boqun.feng@gmail.com/

You can also take a look at existing usage of percpu, e.g. SRCU uses to
track how many readers are active:

	https://elixir.bootlin.com/linux/v6.15.6/source/kernel/rcu/srcutree.c#L577

[...]

Regards,
Boqun

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Benno Lossin 2 months, 3 weeks ago

On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote:
> On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote:
> [...]
>> >> >
>> >> > First of all, `thread_local!` has to be implemented by some sys-specific
>> >> > unsafe mechanism, right? For example on unix, I think it's using
>> >> > pthread_key_t:
>> >> >
>> >> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
>> >> >
>> >> > what we are implementing (or wrapping) is the very basic unsafe
>> >> > mechanism for percpu here. Surely we can explore the design for a safe
>> >> > API, but the unsafe mechanism is probably necessary to look into at
>> >> > first.
>> >> 
>> >> But this is intended to be used by drivers, right? If so, then we should
>> >
>> > Not necessarily only for drivers, we can also use it for implementing
>> > other safe abstraction (e.g. hazard pointers, percpu counters etc)
>> 
>> That's fair, but then it should be `pub(crate)`.
>> 
>
> Fine by me, but please see below.
>
>> >> do our usual due diligence and work out a safe abstraction. Only fall
>> >> back to unsafe if it isn't possible.
>> >> 
>> >
>> > All I'm saying is instead of figuring out a safe abstraction at first,
>> > we should probably focus on identifying how to implement it and which
>> > part is really unsafe and the safety requirement for that.
>> 
>> Yeah. But then we should do that before merging :)
>> 
>
> Well, who's talknig about merging? ;-) I thought we just began reviewing
> here ;-)

I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as
"I want to talk about merging this". It might be that I haven't seen the
RFC patch series, because I often mute those.

>> >> I'm not familiar with percpu, but from the name I assumed that it's
>> >> "just a variable for each cpu" so similar to `thread_local!`, but it's
>> >> bound to the specific cpu instead of the thread.
>> >> 
>> >> That in my mind should be rather easy to support in Rust at least with
>> >> the thread_local-style API. You just need to ensure that no reference
>> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
>> >
>> > Not really, in kernel, we have plenty of use cases that we read the
>> > other CPU's percpu variables. For example, each CPU keeps it's own
>> > counter and we sum them other in another CPU.
>> 
>> But then you need some sort of synchronization?
>> 
>
> Right, but the synchronization can exist either in the percpu operations
> themselves or outside the percpu operations. Some cases, the data types
> are small enough to fit in atomic data types, and operations are just
> load/store/cmpxchg etc, then operations on the current cpu and remote
> read will be naturally synchronized. Sometimes extra synchronization is
> needed.

Sure, so we probably want direct atomics support. What about "extra
synchronization"? Is that using locks or RCU or what else?

> Keyword find all these cases are `per_cpu_ptr()`:
>
> 	https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr

Could you explain to me how to find them? I can either click on one of
the files with horrible C preprocessor macros or the auto-completion in
the search bar. But that one only shows 3 suggestions `_hyp_sym`,
`_nvhe_sym` and `_to_phys` which doesn't really mean much to me.

---
Cheers,
Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Boqun Feng 2 months, 3 weeks ago

On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote:
> On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote:
> > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote:
> > [...]
> >> >> >
> >> >> > First of all, `thread_local!` has to be implemented by some sys-specific
> >> >> > unsafe mechanism, right? For example on unix, I think it's using
> >> >> > pthread_key_t:
> >> >> >
> >> >> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
> >> >> >
> >> >> > what we are implementing (or wrapping) is the very basic unsafe
> >> >> > mechanism for percpu here. Surely we can explore the design for a safe
> >> >> > API, but the unsafe mechanism is probably necessary to look into at
> >> >> > first.
> >> >> 
> >> >> But this is intended to be used by drivers, right? If so, then we should
> >> >
> >> > Not necessarily only for drivers, we can also use it for implementing
> >> > other safe abstraction (e.g. hazard pointers, percpu counters etc)
> >> 
> >> That's fair, but then it should be `pub(crate)`.
> >> 
> >
> > Fine by me, but please see below.
> >
> >> >> do our usual due diligence and work out a safe abstraction. Only fall
> >> >> back to unsafe if it isn't possible.
> >> >> 
> >> >
> >> > All I'm saying is instead of figuring out a safe abstraction at first,
> >> > we should probably focus on identifying how to implement it and which
> >> > part is really unsafe and the safety requirement for that.
> >> 
> >> Yeah. But then we should do that before merging :)
> >> 
> >
> > Well, who's talknig about merging? ;-) I thought we just began reviewing
> > here ;-)
> 
> I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as

But it doesn't mean "merge as it is", right? I don't think either I or
Mitchell implied that, I'm surprised that you had to mention that, also
based on "I often mute those" below, making it "[PATCH]" seems to be a
practical way to get more attention if one wants to get some reviews.

> "I want to talk about merging this". It might be that I haven't seen the
> RFC patch series, because I often mute those.
> 

Well, then you cannot blame people to move from "RFC PATCH" to "PATCH"
stage for more reviews, right? And you cannot make rules about what the
difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-)

> >> >> I'm not familiar with percpu, but from the name I assumed that it's
> >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's
> >> >> bound to the specific cpu instead of the thread.
> >> >> 
> >> >> That in my mind should be rather easy to support in Rust at least with
> >> >> the thread_local-style API. You just need to ensure that no reference
> >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
> >> >
> >> > Not really, in kernel, we have plenty of use cases that we read the
> >> > other CPU's percpu variables. For example, each CPU keeps it's own
> >> > counter and we sum them other in another CPU.
> >> 
> >> But then you need some sort of synchronization?
> >> 
> >
> > Right, but the synchronization can exist either in the percpu operations
> > themselves or outside the percpu operations. Some cases, the data types
> > are small enough to fit in atomic data types, and operations are just
> > load/store/cmpxchg etc, then operations on the current cpu and remote
> > read will be naturally synchronized. Sometimes extra synchronization is
> > needed.
> 
> Sure, so we probably want direct atomics support. What about "extra
> synchronization"? Is that using locks or RCU or what else?
> 

It's up to the users obviously, It could be some sort of locking or RCU,
it's case by case.

> > Keyword find all these cases are `per_cpu_ptr()`:
> >
> > 	https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr
> 
> Could you explain to me how to find them? I can either click on one of
> the files with horrible C preprocessor macros or the auto-completion in
> the search bar. But that one only shows 3 suggestions `_hyp_sym`,
> `_nvhe_sym` and `_to_phys` which doesn't really mean much to me.
> 

You need to find the usage of `per_cpu_ptr()`, which is a function that
gives you a pointer to a percpu variable on the other CPU, and then
that's usually the case where a "remote" read of percpu variable
happens.

Regards,
Boqun

> ---
> Cheers,
> Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Benno Lossin 2 months, 3 weeks ago

On Wed Jul 16, 2025 at 5:33 PM CEST, Boqun Feng wrote:
> On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote:
>> On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote:
>> > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote:
>> > [...]
>> >> >> >
>> >> >> > First of all, `thread_local!` has to be implemented by some sys-specific
>> >> >> > unsafe mechanism, right? For example on unix, I think it's using
>> >> >> > pthread_key_t:
>> >> >> >
>> >> >> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
>> >> >> >
>> >> >> > what we are implementing (or wrapping) is the very basic unsafe
>> >> >> > mechanism for percpu here. Surely we can explore the design for a safe
>> >> >> > API, but the unsafe mechanism is probably necessary to look into at
>> >> >> > first.
>> >> >> 
>> >> >> But this is intended to be used by drivers, right? If so, then we should
>> >> >
>> >> > Not necessarily only for drivers, we can also use it for implementing
>> >> > other safe abstraction (e.g. hazard pointers, percpu counters etc)
>> >> 
>> >> That's fair, but then it should be `pub(crate)`.
>> >> 
>> >
>> > Fine by me, but please see below.
>> >
>> >> >> do our usual due diligence and work out a safe abstraction. Only fall
>> >> >> back to unsafe if it isn't possible.
>> >> >> 
>> >> >
>> >> > All I'm saying is instead of figuring out a safe abstraction at first,
>> >> > we should probably focus on identifying how to implement it and which
>> >> > part is really unsafe and the safety requirement for that.
>> >> 
>> >> Yeah. But then we should do that before merging :)
>> >> 
>> >
>> > Well, who's talknig about merging? ;-) I thought we just began reviewing
>> > here ;-)
>> 
>> I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as
>
> But it doesn't mean "merge as it is", right? I don't think either I or
> Mitchell implied that, I'm surprised that you had to mention that,

Yeah that is true, but it at least shows the intention :)

> also based on "I often mute those" below, making it "[PATCH]" seems to
> be a practical way to get more attention if one wants to get some
> reviews.

That is true, I do usually read the titles of RFC patches though and
sometimes take a look eg your atomics series.

>> "I want to talk about merging this". It might be that I haven't seen the
>> RFC patch series, because I often mute those.
>> 
>
> Well, then you cannot blame people to move from "RFC PATCH" to "PATCH"
> stage for more reviews, right? And you cannot make rules about what the
> difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-)

I'm not trying to blame anyone. I saw a lot of unsafe in the example and
thought "we can do better" and since I haven't heard any sufficient
arguments showing that it's impossible to improve, we should do some
design work.

>> >> >> I'm not familiar with percpu, but from the name I assumed that it's
>> >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's
>> >> >> bound to the specific cpu instead of the thread.
>> >> >> 
>> >> >> That in my mind should be rather easy to support in Rust at least with
>> >> >> the thread_local-style API. You just need to ensure that no reference
>> >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
>> >> >
>> >> > Not really, in kernel, we have plenty of use cases that we read the
>> >> > other CPU's percpu variables. For example, each CPU keeps it's own
>> >> > counter and we sum them other in another CPU.
>> >> 
>> >> But then you need some sort of synchronization?
>> >> 
>> >
>> > Right, but the synchronization can exist either in the percpu operations
>> > themselves or outside the percpu operations. Some cases, the data types
>> > are small enough to fit in atomic data types, and operations are just
>> > load/store/cmpxchg etc, then operations on the current cpu and remote
>> > read will be naturally synchronized. Sometimes extra synchronization is
>> > needed.
>> 
>> Sure, so we probably want direct atomics support. What about "extra
>> synchronization"? Is that using locks or RCU or what else?
>> 
>
> It's up to the users obviously, It could be some sort of locking or RCU,
> it's case by case.

Makes sense, what do you need in the VMS driver?

>> > Keyword find all these cases are `per_cpu_ptr()`:
>> >
>> > 	https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr
>> 
>> Could you explain to me how to find them? I can either click on one of
>> the files with horrible C preprocessor macros or the auto-completion in
>> the search bar. But that one only shows 3 suggestions `_hyp_sym`,
>> `_nvhe_sym` and `_to_phys` which doesn't really mean much to me.
>> 
>
> You need to find the usage of `per_cpu_ptr()`, which is a function that
> gives you a pointer to a percpu variable on the other CPU, and then
> that's usually the case where a "remote" read of percpu variable
> happens.

Ahh gotcha, I thought you pointed me to some definitions of operations
on percpu pointers.

---
Cheers,
Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Boqun Feng 2 months, 3 weeks ago

On Wed, Jul 16, 2025 at 07:21:32PM +0200, Benno Lossin wrote:
> On Wed Jul 16, 2025 at 5:33 PM CEST, Boqun Feng wrote:
> > On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote:
> >> On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote:
> >> > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote:
> >> > [...]
> >> >> >> >
> >> >> >> > First of all, `thread_local!` has to be implemented by some sys-specific
> >> >> >> > unsafe mechanism, right? For example on unix, I think it's using
> >> >> >> > pthread_key_t:
> >> >> >> >
> >> >> >> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
> >> >> >> >
> >> >> >> > what we are implementing (or wrapping) is the very basic unsafe
> >> >> >> > mechanism for percpu here. Surely we can explore the design for a safe
> >> >> >> > API, but the unsafe mechanism is probably necessary to look into at
> >> >> >> > first.
> >> >> >> 
> >> >> >> But this is intended to be used by drivers, right? If so, then we should
> >> >> >
> >> >> > Not necessarily only for drivers, we can also use it for implementing
> >> >> > other safe abstraction (e.g. hazard pointers, percpu counters etc)
> >> >> 
> >> >> That's fair, but then it should be `pub(crate)`.
> >> >> 
> >> >
> >> > Fine by me, but please see below.
> >> >
> >> >> >> do our usual due diligence and work out a safe abstraction. Only fall
> >> >> >> back to unsafe if it isn't possible.
> >> >> >> 
> >> >> >
> >> >> > All I'm saying is instead of figuring out a safe abstraction at first,
> >> >> > we should probably focus on identifying how to implement it and which
> >> >> > part is really unsafe and the safety requirement for that.
> >> >> 
> >> >> Yeah. But then we should do that before merging :)
> >> >> 
> >> >
> >> > Well, who's talknig about merging? ;-) I thought we just began reviewing
> >> > here ;-)
> >> 
> >> I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as
> >
> > But it doesn't mean "merge as it is", right? I don't think either I or
> > Mitchell implied that, I'm surprised that you had to mention that,
> 
> Yeah that is true, but it at least shows the intention :)
> 
> > also based on "I often mute those" below, making it "[PATCH]" seems to
> > be a practical way to get more attention if one wants to get some
> > reviews.
> 
> That is true, I do usually read the titles of RFC patches though and
> sometimes take a look eg your atomics series.
> 
> >> "I want to talk about merging this". It might be that I haven't seen the
> >> RFC patch series, because I often mute those.
> >> 
> >
> > Well, then you cannot blame people to move from "RFC PATCH" to "PATCH"
> > stage for more reviews, right? And you cannot make rules about what the
> > difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-)
> 
> I'm not trying to blame anyone. I saw a lot of unsafe in the example and
> thought "we can do better" and since I haven't heard any sufficient
> arguments showing that it's impossible to improve, we should do some
> design work.
> 

I agree with you, and I like what you're proposing, but I think design
work can be done at "PATCH" stage, right? And sometimes, it's also OK to
do some design work even at some version like "v12" ;-)

Also I want to see more forward-progress actions about the design work
improvement. For example, we can examine every case that makes
unsafe_get_per_cpu!() unsafe, and see if we can improve that by typing
or something else. We always can "do better", but the important part is
how to get there ;-)

> >> >> >> I'm not familiar with percpu, but from the name I assumed that it's
> >> >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's
> >> >> >> bound to the specific cpu instead of the thread.
> >> >> >> 
> >> >> >> That in my mind should be rather easy to support in Rust at least with
> >> >> >> the thread_local-style API. You just need to ensure that no reference
> >> >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
> >> >> >
> >> >> > Not really, in kernel, we have plenty of use cases that we read the
> >> >> > other CPU's percpu variables. For example, each CPU keeps it's own
> >> >> > counter and we sum them other in another CPU.
> >> >> 
> >> >> But then you need some sort of synchronization?
> >> >> 
> >> >
> >> > Right, but the synchronization can exist either in the percpu operations
> >> > themselves or outside the percpu operations. Some cases, the data types
> >> > are small enough to fit in atomic data types, and operations are just
> >> > load/store/cmpxchg etc, then operations on the current cpu and remote
> >> > read will be naturally synchronized. Sometimes extra synchronization is
> >> > needed.
> >> 
> >> Sure, so we probably want direct atomics support. What about "extra
> >> synchronization"? Is that using locks or RCU or what else?
> >> 
> >
> > It's up to the users obviously, It could be some sort of locking or RCU,
> > it's case by case.
> 
> Makes sense, what do you need in the VMS driver?
> 

In VMBus driver, it's actually isolate, i.e. each CPU only access it's
own work_struct, so synchronization between CPUs is not needed.

Regards,
Boqun

> >> > Keyword find all these cases are `per_cpu_ptr()`:
> >> >
> >> > 	https://elixir.bootlin.com/linux/v6.15.6/A/ident/per_cpu_ptr
> >> 
> >> Could you explain to me how to find them? I can either click on one of
> >> the files with horrible C preprocessor macros or the auto-completion in
> >> the search bar. But that one only shows 3 suggestions `_hyp_sym`,
> >> `_nvhe_sym` and `_to_phys` which doesn't really mean much to me.
> >> 
> >
> > You need to find the usage of `per_cpu_ptr()`, which is a function that
> > gives you a pointer to a percpu variable on the other CPU, and then
> > that's usually the case where a "remote" read of percpu variable
> > happens.
> 
> Ahh gotcha, I thought you pointed me to some definitions of operations
> on percpu pointers.
> 
> ---
> Cheers,
> Benno

Re: [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test

Posted by Benno Lossin 2 months, 3 weeks ago

On Wed Jul 16, 2025 at 7:52 PM CEST, Boqun Feng wrote:
> On Wed, Jul 16, 2025 at 07:21:32PM +0200, Benno Lossin wrote:
>> On Wed Jul 16, 2025 at 5:33 PM CEST, Boqun Feng wrote:
>> > On Wed, Jul 16, 2025 at 12:32:04PM +0200, Benno Lossin wrote:
>> >> On Tue Jul 15, 2025 at 11:34 PM CEST, Boqun Feng wrote:
>> >> > On Tue, Jul 15, 2025 at 07:44:01PM +0200, Benno Lossin wrote:
>> >> > [...]
>> >> >> >> >
>> >> >> >> > First of all, `thread_local!` has to be implemented by some sys-specific
>> >> >> >> > unsafe mechanism, right? For example on unix, I think it's using
>> >> >> >> > pthread_key_t:
>> >> >> >> >
>> >> >> >> > 	https://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_key_create.html
>> >> >> >> >
>> >> >> >> > what we are implementing (or wrapping) is the very basic unsafe
>> >> >> >> > mechanism for percpu here. Surely we can explore the design for a safe
>> >> >> >> > API, but the unsafe mechanism is probably necessary to look into at
>> >> >> >> > first.
>> >> >> >> 
>> >> >> >> But this is intended to be used by drivers, right? If so, then we should
>> >> >> >
>> >> >> > Not necessarily only for drivers, we can also use it for implementing
>> >> >> > other safe abstraction (e.g. hazard pointers, percpu counters etc)
>> >> >> 
>> >> >> That's fair, but then it should be `pub(crate)`.
>> >> >> 
>> >> >
>> >> > Fine by me, but please see below.
>> >> >
>> >> >> >> do our usual due diligence and work out a safe abstraction. Only fall
>> >> >> >> back to unsafe if it isn't possible.
>> >> >> >> 
>> >> >> >
>> >> >> > All I'm saying is instead of figuring out a safe abstraction at first,
>> >> >> > we should probably focus on identifying how to implement it and which
>> >> >> > part is really unsafe and the safety requirement for that.
>> >> >> 
>> >> >> Yeah. But then we should do that before merging :)
>> >> >> 
>> >> >
>> >> > Well, who's talknig about merging? ;-) I thought we just began reviewing
>> >> > here ;-)
>> >> 
>> >> I understand [PATCH] emails as "I want to merge this" and [RFC PATCH] as
>> >
>> > But it doesn't mean "merge as it is", right? I don't think either I or
>> > Mitchell implied that, I'm surprised that you had to mention that,
>> 
>> Yeah that is true, but it at least shows the intention :)
>> 
>> > also based on "I often mute those" below, making it "[PATCH]" seems to
>> > be a practical way to get more attention if one wants to get some
>> > reviews.
>> 
>> That is true, I do usually read the titles of RFC patches though and
>> sometimes take a look eg your atomics series.
>> 
>> >> "I want to talk about merging this". It might be that I haven't seen the
>> >> RFC patch series, because I often mute those.
>> >> 
>> >
>> > Well, then you cannot blame people to move from "RFC PATCH" to "PATCH"
>> > stage for more reviews, right? And you cannot make rules about what the
>> > difference between [PATCH] and [RFC PATCH] if you ignore one of them ;-)
>> 
>> I'm not trying to blame anyone. I saw a lot of unsafe in the example and
>> thought "we can do better" and since I haven't heard any sufficient
>> arguments showing that it's impossible to improve, we should do some
>> design work.
>> 
>
> I agree with you, and I like what you're proposing, but I think design
> work can be done at "PATCH" stage, right? And sometimes, it's also OK to
> do some design work even at some version like "v12" ;-)

Yeah of course. The thing is just that nobody asked why there was unsafe
and thus I got the impression that people thought this would be a good
abstraction for percpu. (don't take from this that it's bad :)

> Also I want to see more forward-progress actions about the design work
> improvement. For example, we can examine every case that makes
> unsafe_get_per_cpu!() unsafe, and see if we can improve that by typing
> or something else. We always can "do better", but the important part is
> how to get there ;-)

Yeah that would be a starting point :)

>> >> >> >> I'm not familiar with percpu, but from the name I assumed that it's
>> >> >> >> "just a variable for each cpu" so similar to `thread_local!`, but it's
>> >> >> >> bound to the specific cpu instead of the thread.
>> >> >> >> 
>> >> >> >> That in my mind should be rather easy to support in Rust at least with
>> >> >> >> the thread_local-style API. You just need to ensure that no reference
>> >> >> >> can escape the cpu, so we can make it `!Send` & `!Sync` + rely on klint
>> >> >> >
>> >> >> > Not really, in kernel, we have plenty of use cases that we read the
>> >> >> > other CPU's percpu variables. For example, each CPU keeps it's own
>> >> >> > counter and we sum them other in another CPU.
>> >> >> 
>> >> >> But then you need some sort of synchronization?
>> >> >> 
>> >> >
>> >> > Right, but the synchronization can exist either in the percpu operations
>> >> > themselves or outside the percpu operations. Some cases, the data types
>> >> > are small enough to fit in atomic data types, and operations are just
>> >> > load/store/cmpxchg etc, then operations on the current cpu and remote
>> >> > read will be naturally synchronized. Sometimes extra synchronization is
>> >> > needed.
>> >> 
>> >> Sure, so we probably want direct atomics support. What about "extra
>> >> synchronization"? Is that using locks or RCU or what else?
>> >> 
>> >
>> > It's up to the users obviously, It could be some sort of locking or RCU,
>> > it's case by case.
>> 
>> Makes sense, what do you need in the VMS driver?
>> 
>
> In VMBus driver, it's actually isolate, i.e. each CPU only access it's
> own work_struct, so synchronization between CPUs is not needed.

I see, so we could either just start out with no sync support or --
which I would prefer -- get a list of the most common use-cases and
implement those too (or at least design the first part compatibly with
further extensions).

---
Cheers,
Benno

[PATCH v2 1/5] rust: percpu: introduce a rust API for per-CPU variables
[PATCH v2 2/5] rust: rust-analyzer: add lib to dirs searched for crates
[PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test
[PATCH v2 4/5] rust: percpu: Add pin-hole optimizations for numerics
[PATCH v2 5/5] rust: percpu: cache per-CPU pointers in the dynamic case