[PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu

Mitchell Levy posted 7 patches 1 month ago
[PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Mitchell Levy 1 month ago
Add functionality to `PerCpuPtr` to compute pointers to per-CPU variable
slots on other CPUs. Use this facility to initialize per-CPU variables
on all possible CPUs when a dynamic per-CPU variable is created with a
non-zeroable type. Since `RefCell` and other `Cell`-like types fall into
this category, `impl CheckedPerCpu` on `DynamicPerCpu` for these
`InteriorMutable` types since they can now be used. Add examples of
these usages to `samples/rust/rust_percpu.rs`.

Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
---
 rust/helpers/percpu.c         |  5 +++
 rust/kernel/percpu.rs         | 15 +++++++
 rust/kernel/percpu/dynamic.rs | 40 +++++++++++++++++
 samples/rust/rust_percpu.rs   | 99 ++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 152 insertions(+), 7 deletions(-)

diff --git a/rust/helpers/percpu.c b/rust/helpers/percpu.c
index 8cc01d094752..8d83b6b86106 100644
--- a/rust/helpers/percpu.c
+++ b/rust/helpers/percpu.c
@@ -8,6 +8,11 @@ void __percpu *rust_helper_alloc_percpu(size_t sz, size_t align)
 	return __alloc_percpu(sz, align);
 }
 
+void *rust_helper_per_cpu_ptr(void __percpu *ptr, unsigned int cpu)
+{
+	return per_cpu_ptr(ptr, cpu);
+}
+
 void rust_helper_on_each_cpu(smp_call_func_t func, void *info, int wait)
 {
 	on_each_cpu(func, info, wait);
diff --git a/rust/kernel/percpu.rs b/rust/kernel/percpu.rs
index 35afcdba3ccd..c68c7520b67f 100644
--- a/rust/kernel/percpu.rs
+++ b/rust/kernel/percpu.rs
@@ -14,6 +14,7 @@
 use bindings::{alloc_percpu, free_percpu};
 
 use crate::alloc::Flags;
+use crate::cpu::CpuId;
 use crate::percpu::cpu_guard::CpuGuard;
 use crate::prelude::*;
 use crate::sync::Arc;
@@ -115,6 +116,20 @@ pub fn get_ptr(&self) -> *mut MaybeUninit<T> {
         // the invariant that self.0 is a valid offset into the per-CPU area.
         (this_cpu_area).wrapping_add(self.0 as usize).cast()
     }
+
+    /// Get a `*mut MaybeUninit<T>` to the per-CPU variable on the CPU represented by `cpu`. Note
+    /// that without some kind of synchronization, use of the returned pointer may cause a data
+    /// race. It is the caller's responsibility to use the returned pointer in a reasonable way.
+    ///
+    /// # Safety
+    /// - The returned pointer is valid only if `self` is (that is, it points to a live allocation
+    ///   correctly sized and aligned to hold a `T`)
+    /// - The returned pointer is valid only if the bit corresponding to `cpu` is set in
+    ///   `Cpumask::possible()`.
+    pub unsafe fn get_remote_ptr(&self, cpu: CpuId) -> *mut MaybeUninit<T> {
+        // SAFETY: The requirements of this function ensure this call is safe.
+        unsafe { bindings::per_cpu_ptr(self.0.cast(), cpu.as_u32()) }.cast()
+    }
 }
 
 // SAFETY: Sending a `PerCpuPtr<T>` to another thread is safe because as soon as it's sent, the
diff --git a/rust/kernel/percpu/dynamic.rs b/rust/kernel/percpu/dynamic.rs
index ce95e420f943..64f04cef3705 100644
--- a/rust/kernel/percpu/dynamic.rs
+++ b/rust/kernel/percpu/dynamic.rs
@@ -3,6 +3,8 @@
 
 use super::*;
 
+use crate::cpumask::Cpumask;
+
 /// Represents a dynamic allocation of a per-CPU variable via alloc_percpu. Calls free_percpu when
 /// dropped.
 pub struct PerCpuAllocation<T>(PerCpuPtr<T>);
@@ -74,6 +76,36 @@ pub fn new_zero(flags: Flags) -> Option<Self> {
     }
 }
 
+impl<T: Clone> DynamicPerCpu<T> {
+    /// Allocates a new per-CPU variable
+    ///
+    /// # Arguments
+    /// * `val` - The initial value of the per-CPU variable on all CPUs.
+    /// * `flags` - Flags used to allocate an `Arc` that keeps track of the underlying
+    ///   `PerCpuAllocation`.
+    pub fn new_with(val: T, flags: Flags) -> Option<Self> {
+        let alloc: PerCpuAllocation<T> = PerCpuAllocation::new_uninit()?;
+        let ptr = alloc.0;
+
+        for cpu in Cpumask::possible().iter() {
+            // SAFETY: `ptr` is a valid allocation, and `cpu` appears in `Cpumask::possible()`
+            let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
+            // SAFETY: Each CPU's slot corresponding to `ptr` is currently uninitialized, and no
+            // one else has a reference to it. Therefore, we can freely write to it without
+            // worrying about the need to drop what was there or whether we're racing with someone
+            // else. `PerCpuPtr::get_remote_ptr` guarantees that the pointer is valid since we
+            // derived it from a valid allocation and `cpu`.
+            unsafe {
+                (*remote_ptr).write(val.clone());
+            }
+        }
+
+        let arc = Arc::new(alloc, flags).ok()?;
+
+        Some(Self { alloc: arc })
+    }
+}
+
 impl<T> PerCpu<T> for DynamicPerCpu<T> {
     unsafe fn get_mut(&mut self, guard: CpuGuard) -> PerCpuToken<'_, T> {
         // SAFETY: The requirements of `PerCpu::get_mut` and this type's invariant ensure that the
@@ -81,3 +113,11 @@ unsafe fn get_mut(&mut self, guard: CpuGuard) -> PerCpuToken<'_, T> {
         unsafe { PerCpuToken::new(guard, &self.alloc.0) }
     }
 }
+
+impl<T: InteriorMutable> CheckedPerCpu<T> for DynamicPerCpu<T> {
+    fn get(&mut self, guard: CpuGuard) -> CheckedPerCpuToken<'_, T> {
+        // SAFETY: By the invariant of `DynamicPerCpu`, the memory location in each CPU's
+        // per-CPU area corresponding to this variable has been initialized.
+        unsafe { CheckedPerCpuToken::new(guard, &self.alloc.0) }
+    }
+}
diff --git a/samples/rust/rust_percpu.rs b/samples/rust/rust_percpu.rs
index 98ca1c781b6b..06b322019134 100644
--- a/samples/rust/rust_percpu.rs
+++ b/samples/rust/rust_percpu.rs
@@ -130,13 +130,72 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
 
         // SAFETY: No prerequisites for on_each_cpu.
         unsafe {
-            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
-            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
-            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
-            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 1);
-            on_each_cpu(Some(check_percpu), (&raw mut test).cast(), 1);
+            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
+            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
+            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
+            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 1);
+            on_each_cpu(Some(check_percpu_u64), (&raw mut test).cast(), 1);
         }
 
+        let mut checked: DynamicPerCpu<RefCell<u64>> =
+            DynamicPerCpu::new_with(RefCell::new(100), GFP_KERNEL).unwrap();
+
+        // SAFETY: No prerequisites for on_each_cpu.
+        unsafe {
+            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
+            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
+            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
+            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 1);
+            on_each_cpu(Some(check_percpu_refcell_u64), (&raw mut checked).cast(), 1);
+        }
+
+        checked.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
+            assert!(*val.borrow() == 104);
+
+            let mut checked_native = 0;
+            *val.borrow_mut() = 0;
+
+            checked_native += 1;
+            *val.borrow_mut() += 1;
+            pr_info!(
+                "Checked native: {}, *checked: {}\n",
+                checked_native,
+                val.borrow()
+            );
+            assert!(checked_native == *val.borrow() && checked_native == 1);
+
+            checked_native = checked_native.wrapping_add((-1i64) as u64);
+            val.replace_with(|old: &mut u64| old.wrapping_add((-1i64) as u64));
+            pr_info!(
+                "Checked native: {}, *checked: {}\n",
+                checked_native,
+                val.borrow()
+            );
+            assert!(checked_native == *val.borrow() && checked_native == 0);
+
+            checked_native = checked_native.wrapping_add((-1i64) as u64);
+            val.replace_with(|old: &mut u64| old.wrapping_add((-1i64) as u64));
+            pr_info!(
+                "Checked native: {}, *checked: {}\n",
+                checked_native,
+                val.borrow()
+            );
+            assert!(checked_native == *val.borrow() && checked_native == (-1i64) as u64);
+
+            checked_native = 0;
+            *val.borrow_mut() = 0;
+
+            checked_native = checked_native.wrapping_sub(1);
+            val.replace_with(|old: &mut u64| old.wrapping_sub(1));
+            pr_info!(
+                "Checked native: {}, *checked: {}\n",
+                checked_native,
+                val.borrow()
+            );
+            assert!(checked_native == *val.borrow() && checked_native == (-1i64) as u64);
+            assert!(checked_native == *val.borrow() && checked_native == u64::MAX);
+        });
+
         pr_info!("rust dynamic percpu test done\n");
 
         // Return Err to unload the module
@@ -144,7 +203,7 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
     }
 }
 
-extern "C" fn inc_percpu(info: *mut c_void) {
+extern "C" fn inc_percpu_u64(info: *mut c_void) {
     // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
     let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
     pr_info!("Incrementing on {}\n", CpuId::current().as_u32());
@@ -153,7 +212,7 @@ extern "C" fn inc_percpu(info: *mut c_void) {
     unsafe { pcpu.get_mut(CpuGuard::new()) }.with(|val: &mut u64| *val += 1);
 }
 
-extern "C" fn check_percpu(info: *mut c_void) {
+extern "C" fn check_percpu_u64(info: *mut c_void) {
     // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
     let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
     pr_info!("Asserting on {}\n", CpuId::current().as_u32());
@@ -161,3 +220,29 @@ extern "C" fn check_percpu(info: *mut c_void) {
     // SAFETY: We don't have multiple clones of pcpu in scope
     unsafe { pcpu.get_mut(CpuGuard::new()) }.with(|val: &mut u64| assert!(*val == 4));
 }
+
+extern "C" fn inc_percpu_refcell_u64(info: *mut c_void) {
+    // SAFETY: We know that info is a void *const DynamicPerCpu<RefCell<u64>> and
+    // DynamicPerCpu<RefCell<u64>> is Send.
+    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<RefCell<u64>>)).clone() };
+    // SAFETY: smp_processor_id has no preconditions
+    pr_info!("Incrementing on {}\n", CpuId::current().as_u32());
+
+    pcpu.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
+        let mut val = val.borrow_mut();
+        *val += 1;
+    });
+}
+
+extern "C" fn check_percpu_refcell_u64(info: *mut c_void) {
+    // SAFETY: We know that info is a void *const DynamicPerCpu<RefCell<u64>> and
+    // DynamicPerCpu<RefCell<u64>> is Send.
+    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<RefCell<u64>>)).clone() };
+    // SAFETY: smp_processor_id has no preconditions
+    pr_info!("Asserting on {}\n", CpuId::current().as_u32());
+
+    pcpu.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
+        let val = val.borrow();
+        assert!(*val == 104);
+    });
+}

-- 
2.34.1
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Miguel Ojeda 4 weeks, 1 day ago
On Thu, Aug 28, 2025 at 9:01 PM Mitchell Levy <levymitchell0@gmail.com> wrote:
>
> +    /// Get a `*mut MaybeUninit<T>` to the per-CPU variable on the CPU represented by `cpu`. Note
> +    /// that without some kind of synchronization, use of the returned pointer may cause a data
> +    /// race. It is the caller's responsibility to use the returned pointer in a reasonable way.

Please try to make the first paragraph ("short description" / title) smaller.

Does "reasonable" mean anything different than any other raw pointer?

> +    /// # Safety

Newline after section headers (also elsewhere).

> +    /// - The returned pointer is valid only if `self` is (that is, it points to a live allocation
> +    ///   correctly sized and aligned to hold a `T`)
> +    /// - The returned pointer is valid only if the bit corresponding to `cpu` is set in
> +    ///   `Cpumask::possible()`.

It sounds like the returned pointer can be invalid without triggering
UB -- could you please clarify why the method is `unsafe`?

In addition, please use intra-doc links wherever possible (e.g. there
a was also an `Arc` elsewhere).

> +        // SAFETY: The requirements of this function ensure this call is safe.
> +        unsafe { bindings::per_cpu_ptr(self.0.cast(), cpu.as_u32()) }.cast()

Please try to explain why, not just that it is. It isn't clear how the
safety preconditions, which only seem to talk about the returned
pointer, make this OK.

Thanks!

Cheers,
Miguel
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Mitchell Levy 4 weeks ago
On Thu, Sep 04, 2025 at 01:05:36AM +0200, Miguel Ojeda wrote:
> On Thu, Aug 28, 2025 at 9:01 PM Mitchell Levy <levymitchell0@gmail.com> wrote:
> >
> > +    /// Get a `*mut MaybeUninit<T>` to the per-CPU variable on the CPU represented by `cpu`. Note
> > +    /// that without some kind of synchronization, use of the returned pointer may cause a data
> > +    /// race. It is the caller's responsibility to use the returned pointer in a reasonable way.
> 
> Please try to make the first paragraph ("short description" / title) smaller.

Will do.

> Does "reasonable" mean anything different than any other raw pointer?

This will depend very heavily on `T`, there's a detailed discussion
here: https://docs.kernel.org/core-api/this_cpu_ops.html#remote-access-to-per-cpu-data

In general, remote accesses are strongly discouraged, and my intention
was mostly just wanting to reflect that in this documentation.

> > +    /// # Safety
> 
> Newline after section headers (also elsewhere).

Will do.

> > +    /// - The returned pointer is valid only if `self` is (that is, it points to a live allocation
> > +    ///   correctly sized and aligned to hold a `T`)
> > +    /// - The returned pointer is valid only if the bit corresponding to `cpu` is set in
> > +    ///   `Cpumask::possible()`.
> 
> It sounds like the returned pointer can be invalid without triggering
> UB -- could you please clarify why the method is `unsafe`?

Yes, this is true; strictly speaking, calling this function without
dereferencing the returned pointer is always safe. I suppose I find it
clearer that, when a function has preconditions that are necessary for
it to return a valid pointer, the "safe-ness" has more to do with the
functions preconditions than the act of dereferencing the pointer.

In this case, the pointer shouldn't be going very far, but I think this
logic applies especially well in cases where pointers might be getting
stored away for later (and so the validity of the dereference later on
might rely on non-local conditions).

All that said, I'm alright with having this be a safe function, with the
documentation noting these requirements for the returned pointer to be
valid.

> In addition, please use intra-doc links wherever possible (e.g. there
> a was also an `Arc` elsewhere).

Will do.

> > +        // SAFETY: The requirements of this function ensure this call is safe.
> > +        unsafe { bindings::per_cpu_ptr(self.0.cast(), cpu.as_u32()) }.cast()
> 
> Please try to explain why, not just that it is. It isn't clear how the
> safety preconditions, which only seem to talk about the returned
> pointer, make this OK.

This flows from the first requirement (that `self` is a live allocation,
which is necessary for `per_cpu_ptr` to return a valid pointer). Though,
as above, simply possessing this pointer isn't UB, so it's arguable that
any call to `per_cpu_ptr` (on x86 at least, I'm not sure how it's
implemented on other arches) is always safe. Regardless, I agree this
comment should be more clear (even if the function is safe, it's
probably worth noting why the pointer returned is valid when the
function preconditions are met); will fix.

Thanks,
Mitchell

> Thanks!
> 
> Cheers,
> Miguel
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Miguel Ojeda 4 weeks ago
On Thu, Sep 4, 2025 at 10:17 PM Mitchell Levy <levymitchell0@gmail.com> wrote:
>
> Will do.

By the way, sorry -- just to clarify, I didn't mean to remove
anything, but rather split it (likely at the first full stop).

> Yes, this is true; strictly speaking, calling this function without
> dereferencing the returned pointer is always safe. I suppose I find i
t> clearer that, when a function has preconditions that are necessary for
> it to return a valid pointer, the "safe-ness" has more to do with the
> functions preconditions than the act of dereferencing the pointer.

In that case, I would make it safe -- we don't use safety
preconditions to mark "dangerous" operations, so to speak.

> In this case, the pointer shouldn't be going very far, but I think this
> logic applies especially well in cases where pointers might be getting
> stored away for later (and so the validity of the dereference later on
> might rely on non-local conditions).

But that applies to any returned raw pointer, no?

> This flows from the first requirement (that `self` is a live allocation,
> which is necessary for `per_cpu_ptr` to return a valid pointer). Though,
> as above, simply possessing this pointer isn't UB, so it's arguable that
> any call to `per_cpu_ptr` (on x86 at least, I'm not sure how it's
> implemented on other arches) is always safe. Regardless, I agree this
> comment should be more clear (even if the function is safe, it's
> probably worth noting why the pointer returned is valid when the
> function preconditions are met); will fix.

Sounds good, thanks!

If you have cases you think other architectures may have different
requirements, and you think it is likely one could enable the support
for other arches and break it, then I would suggest adding a comment
about it.

Or, ideally, try to see if other architectures are fine already, even
if you still don't enable the code in other arches.

Cheers,
Miguel
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Mitchell Levy 4 weeks ago
On Thu, Sep 04, 2025 at 10:37:48PM +0200, Miguel Ojeda wrote:
> On Thu, Sep 4, 2025 at 10:17 PM Mitchell Levy <levymitchell0@gmail.com> wrote:
> >
> > Will do.
> 
> By the way, sorry -- just to clarify, I didn't mean to remove
> anything, but rather split it (likely at the first full stop).

Oh no worries, I gotcha

> > Yes, this is true; strictly speaking, calling this function without
> > dereferencing the returned pointer is always safe. I suppose I find i
> t> clearer that, when a function has preconditions that are necessary for
> > it to return a valid pointer, the "safe-ness" has more to do with the
> > functions preconditions than the act of dereferencing the pointer.
> 
> In that case, I would make it safe -- we don't use safety
> preconditions to mark "dangerous" operations, so to speak.

Understood.

> > In this case, the pointer shouldn't be going very far, but I think this
> > logic applies especially well in cases where pointers might be getting
> > stored away for later (and so the validity of the dereference later on
> > might rely on non-local conditions).
> 
> But that applies to any returned raw pointer, no?

This is a fair point :)

> > This flows from the first requirement (that `self` is a live allocation,
> > which is necessary for `per_cpu_ptr` to return a valid pointer). Though,
> > as above, simply possessing this pointer isn't UB, so it's arguable that
> > any call to `per_cpu_ptr` (on x86 at least, I'm not sure how it's
> > implemented on other arches) is always safe. Regardless, I agree this
> > comment should be more clear (even if the function is safe, it's
> > probably worth noting why the pointer returned is valid when the
> > function preconditions are met); will fix.
> 
> Sounds good, thanks!
> 
> If you have cases you think other architectures may have different
> requirements, and you think it is likely one could enable the support
> for other arches and break it, then I would suggest adding a comment
> about it.
>
> Or, ideally, try to see if other architectures are fine already, even
> if you still don't enable the code in other arches.

Yeah agreed, I'd like to get ARM64 support going soon-ish at the very
least. My hope is that I should only need to mess with `percpu::numeric`
and `PerCpuPtr::get_ptr`... usage of `per_cpu_ptr` *shouldn't* have any
prerequisites, but there's just this note:
https://elixir.bootlin.com/linux/v6.17-rc4/source/include/asm-generic/percpu.h#L29
so I don't want to make strong claims :)

(note, I think the comment about x86_64 might be out-of-date, see
https://elixir.bootlin.com/linux/v6.17-rc4/source/arch/x86/kernel/setup_percpu.c#L32
and
https://elixir.bootlin.com/linux/v6.17-rc4/source/arch/x86/kernel/setup_percpu.c#L165
)

Thanks,
Mitchell

> Cheers,
> Miguel
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Yury Norov 4 weeks, 1 day ago
On Thu, Aug 28, 2025 at 12:00:12PM -0700, Mitchell Levy wrote:
> Add functionality to `PerCpuPtr` to compute pointers to per-CPU variable
> slots on other CPUs. Use this facility to initialize per-CPU variables
> on all possible CPUs when a dynamic per-CPU variable is created with a
> non-zeroable type. Since `RefCell` and other `Cell`-like types fall into
> this category, `impl CheckedPerCpu` on `DynamicPerCpu` for these
> `InteriorMutable` types since they can now be used. Add examples of
> these usages to `samples/rust/rust_percpu.rs`.
> 
> Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
> ---
>  rust/helpers/percpu.c         |  5 +++
>  rust/kernel/percpu.rs         | 15 +++++++
>  rust/kernel/percpu/dynamic.rs | 40 +++++++++++++++++
>  samples/rust/rust_percpu.rs   | 99 ++++++++++++++++++++++++++++++++++++++++---
>  4 files changed, 152 insertions(+), 7 deletions(-)
> 
> diff --git a/rust/helpers/percpu.c b/rust/helpers/percpu.c
> index 8cc01d094752..8d83b6b86106 100644
> --- a/rust/helpers/percpu.c
> +++ b/rust/helpers/percpu.c
> @@ -8,6 +8,11 @@ void __percpu *rust_helper_alloc_percpu(size_t sz, size_t align)
>  	return __alloc_percpu(sz, align);
>  }
>  
> +void *rust_helper_per_cpu_ptr(void __percpu *ptr, unsigned int cpu)
> +{
> +	return per_cpu_ptr(ptr, cpu);
> +}
> +
>  void rust_helper_on_each_cpu(smp_call_func_t func, void *info, int wait)
>  {
>  	on_each_cpu(func, info, wait);
> diff --git a/rust/kernel/percpu.rs b/rust/kernel/percpu.rs
> index 35afcdba3ccd..c68c7520b67f 100644
> --- a/rust/kernel/percpu.rs
> +++ b/rust/kernel/percpu.rs
> @@ -14,6 +14,7 @@
>  use bindings::{alloc_percpu, free_percpu};
>  
>  use crate::alloc::Flags;
> +use crate::cpu::CpuId;
>  use crate::percpu::cpu_guard::CpuGuard;
>  use crate::prelude::*;
>  use crate::sync::Arc;
> @@ -115,6 +116,20 @@ pub fn get_ptr(&self) -> *mut MaybeUninit<T> {
>          // the invariant that self.0 is a valid offset into the per-CPU area.
>          (this_cpu_area).wrapping_add(self.0 as usize).cast()
>      }
> +
> +    /// Get a `*mut MaybeUninit<T>` to the per-CPU variable on the CPU represented by `cpu`. Note
> +    /// that without some kind of synchronization, use of the returned pointer may cause a data
> +    /// race. It is the caller's responsibility to use the returned pointer in a reasonable way.
> +    ///
> +    /// # Safety
> +    /// - The returned pointer is valid only if `self` is (that is, it points to a live allocation
> +    ///   correctly sized and aligned to hold a `T`)
> +    /// - The returned pointer is valid only if the bit corresponding to `cpu` is set in
> +    ///   `Cpumask::possible()`.

Instead of explaining those rules in comments, can you just enforce
them in code? Not sure about the 1st rule, but the 2nd one looks like
a trivial check.

> +    pub unsafe fn get_remote_ptr(&self, cpu: CpuId) -> *mut MaybeUninit<T> {
> +        // SAFETY: The requirements of this function ensure this call is safe.
> +        unsafe { bindings::per_cpu_ptr(self.0.cast(), cpu.as_u32()) }.cast()
> +    }
>  }
>  
>  // SAFETY: Sending a `PerCpuPtr<T>` to another thread is safe because as soon as it's sent, the
> diff --git a/rust/kernel/percpu/dynamic.rs b/rust/kernel/percpu/dynamic.rs
> index ce95e420f943..64f04cef3705 100644
> --- a/rust/kernel/percpu/dynamic.rs
> +++ b/rust/kernel/percpu/dynamic.rs
> @@ -3,6 +3,8 @@
>  
>  use super::*;
>  
> +use crate::cpumask::Cpumask;
> +
>  /// Represents a dynamic allocation of a per-CPU variable via alloc_percpu. Calls free_percpu when
>  /// dropped.
>  pub struct PerCpuAllocation<T>(PerCpuPtr<T>);
> @@ -74,6 +76,36 @@ pub fn new_zero(flags: Flags) -> Option<Self> {
>      }
>  }
>  
> +impl<T: Clone> DynamicPerCpu<T> {
> +    /// Allocates a new per-CPU variable
> +    ///
> +    /// # Arguments
> +    /// * `val` - The initial value of the per-CPU variable on all CPUs.
> +    /// * `flags` - Flags used to allocate an `Arc` that keeps track of the underlying
> +    ///   `PerCpuAllocation`.
> +    pub fn new_with(val: T, flags: Flags) -> Option<Self> {
> +        let alloc: PerCpuAllocation<T> = PerCpuAllocation::new_uninit()?;
> +        let ptr = alloc.0;
> +
> +        for cpu in Cpumask::possible().iter() {

In C we've got the 'for_each_possible_cpu()'. Is there any way to
preserve that semantics in rust? I really believe that similar
semantics on higher level on both sides will help _a_lot_ for those
transitioning into the rust world (like me).

Thanks,
Yury
 
> +            // SAFETY: `ptr` is a valid allocation, and `cpu` appears in `Cpumask::possible()`
> +            let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
> +            // SAFETY: Each CPU's slot corresponding to `ptr` is currently uninitialized, and no
> +            // one else has a reference to it. Therefore, we can freely write to it without
> +            // worrying about the need to drop what was there or whether we're racing with someone
> +            // else. `PerCpuPtr::get_remote_ptr` guarantees that the pointer is valid since we
> +            // derived it from a valid allocation and `cpu`.
> +            unsafe {
> +                (*remote_ptr).write(val.clone());
> +            }
> +        }
> +
> +        let arc = Arc::new(alloc, flags).ok()?;
> +
> +        Some(Self { alloc: arc })
> +    }
> +}
> +
>  impl<T> PerCpu<T> for DynamicPerCpu<T> {
>      unsafe fn get_mut(&mut self, guard: CpuGuard) -> PerCpuToken<'_, T> {
>          // SAFETY: The requirements of `PerCpu::get_mut` and this type's invariant ensure that the
> @@ -81,3 +113,11 @@ unsafe fn get_mut(&mut self, guard: CpuGuard) -> PerCpuToken<'_, T> {
>          unsafe { PerCpuToken::new(guard, &self.alloc.0) }
>      }
>  }
> +
> +impl<T: InteriorMutable> CheckedPerCpu<T> for DynamicPerCpu<T> {
> +    fn get(&mut self, guard: CpuGuard) -> CheckedPerCpuToken<'_, T> {
> +        // SAFETY: By the invariant of `DynamicPerCpu`, the memory location in each CPU's
> +        // per-CPU area corresponding to this variable has been initialized.
> +        unsafe { CheckedPerCpuToken::new(guard, &self.alloc.0) }
> +    }
> +}
> diff --git a/samples/rust/rust_percpu.rs b/samples/rust/rust_percpu.rs
> index 98ca1c781b6b..06b322019134 100644
> --- a/samples/rust/rust_percpu.rs
> +++ b/samples/rust/rust_percpu.rs
> @@ -130,13 +130,72 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
>  
>          // SAFETY: No prerequisites for on_each_cpu.
>          unsafe {
> -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
> -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
> -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
> -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 1);
> -            on_each_cpu(Some(check_percpu), (&raw mut test).cast(), 1);
> +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
> +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
> +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
> +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 1);
> +            on_each_cpu(Some(check_percpu_u64), (&raw mut test).cast(), 1);
>          }
>  
> +        let mut checked: DynamicPerCpu<RefCell<u64>> =
> +            DynamicPerCpu::new_with(RefCell::new(100), GFP_KERNEL).unwrap();
> +
> +        // SAFETY: No prerequisites for on_each_cpu.
> +        unsafe {
> +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
> +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
> +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
> +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 1);
> +            on_each_cpu(Some(check_percpu_refcell_u64), (&raw mut checked).cast(), 1);
> +        }
> +
> +        checked.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
> +            assert!(*val.borrow() == 104);
> +
> +            let mut checked_native = 0;
> +            *val.borrow_mut() = 0;
> +
> +            checked_native += 1;
> +            *val.borrow_mut() += 1;
> +            pr_info!(
> +                "Checked native: {}, *checked: {}\n",
> +                checked_native,
> +                val.borrow()
> +            );
> +            assert!(checked_native == *val.borrow() && checked_native == 1);
> +
> +            checked_native = checked_native.wrapping_add((-1i64) as u64);
> +            val.replace_with(|old: &mut u64| old.wrapping_add((-1i64) as u64));
> +            pr_info!(
> +                "Checked native: {}, *checked: {}\n",
> +                checked_native,
> +                val.borrow()
> +            );
> +            assert!(checked_native == *val.borrow() && checked_native == 0);
> +
> +            checked_native = checked_native.wrapping_add((-1i64) as u64);
> +            val.replace_with(|old: &mut u64| old.wrapping_add((-1i64) as u64));
> +            pr_info!(
> +                "Checked native: {}, *checked: {}\n",
> +                checked_native,
> +                val.borrow()
> +            );
> +            assert!(checked_native == *val.borrow() && checked_native == (-1i64) as u64);
> +
> +            checked_native = 0;
> +            *val.borrow_mut() = 0;
> +
> +            checked_native = checked_native.wrapping_sub(1);
> +            val.replace_with(|old: &mut u64| old.wrapping_sub(1));
> +            pr_info!(
> +                "Checked native: {}, *checked: {}\n",
> +                checked_native,
> +                val.borrow()
> +            );
> +            assert!(checked_native == *val.borrow() && checked_native == (-1i64) as u64);
> +            assert!(checked_native == *val.borrow() && checked_native == u64::MAX);
> +        });
> +
>          pr_info!("rust dynamic percpu test done\n");
>  
>          // Return Err to unload the module
> @@ -144,7 +203,7 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
>      }
>  }
>  
> -extern "C" fn inc_percpu(info: *mut c_void) {
> +extern "C" fn inc_percpu_u64(info: *mut c_void) {
>      // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
>      let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
>      pr_info!("Incrementing on {}\n", CpuId::current().as_u32());
> @@ -153,7 +212,7 @@ extern "C" fn inc_percpu(info: *mut c_void) {
>      unsafe { pcpu.get_mut(CpuGuard::new()) }.with(|val: &mut u64| *val += 1);
>  }
>  
> -extern "C" fn check_percpu(info: *mut c_void) {
> +extern "C" fn check_percpu_u64(info: *mut c_void) {
>      // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
>      let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
>      pr_info!("Asserting on {}\n", CpuId::current().as_u32());
> @@ -161,3 +220,29 @@ extern "C" fn check_percpu(info: *mut c_void) {
>      // SAFETY: We don't have multiple clones of pcpu in scope
>      unsafe { pcpu.get_mut(CpuGuard::new()) }.with(|val: &mut u64| assert!(*val == 4));
>  }
> +
> +extern "C" fn inc_percpu_refcell_u64(info: *mut c_void) {
> +    // SAFETY: We know that info is a void *const DynamicPerCpu<RefCell<u64>> and
> +    // DynamicPerCpu<RefCell<u64>> is Send.
> +    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<RefCell<u64>>)).clone() };
> +    // SAFETY: smp_processor_id has no preconditions
> +    pr_info!("Incrementing on {}\n", CpuId::current().as_u32());
> +
> +    pcpu.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
> +        let mut val = val.borrow_mut();
> +        *val += 1;
> +    });
> +}
> +
> +extern "C" fn check_percpu_refcell_u64(info: *mut c_void) {
> +    // SAFETY: We know that info is a void *const DynamicPerCpu<RefCell<u64>> and
> +    // DynamicPerCpu<RefCell<u64>> is Send.
> +    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<RefCell<u64>>)).clone() };
> +    // SAFETY: smp_processor_id has no preconditions
> +    pr_info!("Asserting on {}\n", CpuId::current().as_u32());
> +
> +    pcpu.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
> +        let val = val.borrow();
> +        assert!(*val == 104);
> +    });
> +}
> 
> -- 
> 2.34.1
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Mitchell Levy 4 weeks ago
On Wed, Sep 03, 2025 at 06:19:25PM -0400, Yury Norov wrote:
> On Thu, Aug 28, 2025 at 12:00:12PM -0700, Mitchell Levy wrote:
> > Add functionality to `PerCpuPtr` to compute pointers to per-CPU variable
> > slots on other CPUs. Use this facility to initialize per-CPU variables
> > on all possible CPUs when a dynamic per-CPU variable is created with a
> > non-zeroable type. Since `RefCell` and other `Cell`-like types fall into
> > this category, `impl CheckedPerCpu` on `DynamicPerCpu` for these
> > `InteriorMutable` types since they can now be used. Add examples of
> > these usages to `samples/rust/rust_percpu.rs`.
> > 
> > Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
> > ---
> >  rust/helpers/percpu.c         |  5 +++
> >  rust/kernel/percpu.rs         | 15 +++++++
> >  rust/kernel/percpu/dynamic.rs | 40 +++++++++++++++++
> >  samples/rust/rust_percpu.rs   | 99 ++++++++++++++++++++++++++++++++++++++++---
> >  4 files changed, 152 insertions(+), 7 deletions(-)
> > 
> > diff --git a/rust/helpers/percpu.c b/rust/helpers/percpu.c
> > index 8cc01d094752..8d83b6b86106 100644
> > --- a/rust/helpers/percpu.c
> > +++ b/rust/helpers/percpu.c
> > @@ -8,6 +8,11 @@ void __percpu *rust_helper_alloc_percpu(size_t sz, size_t align)
> >  	return __alloc_percpu(sz, align);
> >  }
> >  
> > +void *rust_helper_per_cpu_ptr(void __percpu *ptr, unsigned int cpu)
> > +{
> > +	return per_cpu_ptr(ptr, cpu);
> > +}
> > +
> >  void rust_helper_on_each_cpu(smp_call_func_t func, void *info, int wait)
> >  {
> >  	on_each_cpu(func, info, wait);
> > diff --git a/rust/kernel/percpu.rs b/rust/kernel/percpu.rs
> > index 35afcdba3ccd..c68c7520b67f 100644
> > --- a/rust/kernel/percpu.rs
> > +++ b/rust/kernel/percpu.rs
> > @@ -14,6 +14,7 @@
> >  use bindings::{alloc_percpu, free_percpu};
> >  
> >  use crate::alloc::Flags;
> > +use crate::cpu::CpuId;
> >  use crate::percpu::cpu_guard::CpuGuard;
> >  use crate::prelude::*;
> >  use crate::sync::Arc;
> > @@ -115,6 +116,20 @@ pub fn get_ptr(&self) -> *mut MaybeUninit<T> {
> >          // the invariant that self.0 is a valid offset into the per-CPU area.
> >          (this_cpu_area).wrapping_add(self.0 as usize).cast()
> >      }
> > +
> > +    /// Get a `*mut MaybeUninit<T>` to the per-CPU variable on the CPU represented by `cpu`. Note
> > +    /// that without some kind of synchronization, use of the returned pointer may cause a data
> > +    /// race. It is the caller's responsibility to use the returned pointer in a reasonable way.
> > +    ///
> > +    /// # Safety
> > +    /// - The returned pointer is valid only if `self` is (that is, it points to a live allocation
> > +    ///   correctly sized and aligned to hold a `T`)
> > +    /// - The returned pointer is valid only if the bit corresponding to `cpu` is set in
> > +    ///   `Cpumask::possible()`.
> 
> Instead of explaining those rules in comments, can you just enforce
> them in code? Not sure about the 1st rule, but the 2nd one looks like
> a trivial check.

I don't think the first one is possible to check (at least, certainly
not without peeking deeply into the per-CPU allocation management
system).

For the second, I will probably be converting this function to a safe
function (see my reply to Miguel), with these being requirements for the
returned pointer to be valid, so panicking when the requirements aren't
met would probably not be expected behavior.

> > +    pub unsafe fn get_remote_ptr(&self, cpu: CpuId) -> *mut MaybeUninit<T> {
> > +        // SAFETY: The requirements of this function ensure this call is safe.
> > +        unsafe { bindings::per_cpu_ptr(self.0.cast(), cpu.as_u32()) }.cast()
> > +    }
> >  }
> >  
> >  // SAFETY: Sending a `PerCpuPtr<T>` to another thread is safe because as soon as it's sent, the
> > diff --git a/rust/kernel/percpu/dynamic.rs b/rust/kernel/percpu/dynamic.rs
> > index ce95e420f943..64f04cef3705 100644
> > --- a/rust/kernel/percpu/dynamic.rs
> > +++ b/rust/kernel/percpu/dynamic.rs
> > @@ -3,6 +3,8 @@
> >  
> >  use super::*;
> >  
> > +use crate::cpumask::Cpumask;
> > +
> >  /// Represents a dynamic allocation of a per-CPU variable via alloc_percpu. Calls free_percpu when
> >  /// dropped.
> >  pub struct PerCpuAllocation<T>(PerCpuPtr<T>);
> > @@ -74,6 +76,36 @@ pub fn new_zero(flags: Flags) -> Option<Self> {
> >      }
> >  }
> >  
> > +impl<T: Clone> DynamicPerCpu<T> {
> > +    /// Allocates a new per-CPU variable
> > +    ///
> > +    /// # Arguments
> > +    /// * `val` - The initial value of the per-CPU variable on all CPUs.
> > +    /// * `flags` - Flags used to allocate an `Arc` that keeps track of the underlying
> > +    ///   `PerCpuAllocation`.
> > +    pub fn new_with(val: T, flags: Flags) -> Option<Self> {
> > +        let alloc: PerCpuAllocation<T> = PerCpuAllocation::new_uninit()?;
> > +        let ptr = alloc.0;
> > +
> > +        for cpu in Cpumask::possible().iter() {
> 
> In C we've got the 'for_each_possible_cpu()'. Is there any way to
> preserve that semantics in rust? I really believe that similar
> semantics on higher level on both sides will help _a_lot_ for those
> transitioning into the rust world (like me).

I'm not sure I understand what you mean --- I believe the semantics
should be the same here (`cpu` takes on each value in
`cpu_possible_mask`). Could you please clarify?

Thanks,
Mitchell

> Thanks,
> Yury
>  
> > +            // SAFETY: `ptr` is a valid allocation, and `cpu` appears in `Cpumask::possible()`
> > +            let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
> > +            // SAFETY: Each CPU's slot corresponding to `ptr` is currently uninitialized, and no
> > +            // one else has a reference to it. Therefore, we can freely write to it without
> > +            // worrying about the need to drop what was there or whether we're racing with someone
> > +            // else. `PerCpuPtr::get_remote_ptr` guarantees that the pointer is valid since we
> > +            // derived it from a valid allocation and `cpu`.
> > +            unsafe {
> > +                (*remote_ptr).write(val.clone());
> > +            }
> > +        }
> > +
> > +        let arc = Arc::new(alloc, flags).ok()?;
> > +
> > +        Some(Self { alloc: arc })
> > +    }
> > +}
> > +
> >  impl<T> PerCpu<T> for DynamicPerCpu<T> {
> >      unsafe fn get_mut(&mut self, guard: CpuGuard) -> PerCpuToken<'_, T> {
> >          // SAFETY: The requirements of `PerCpu::get_mut` and this type's invariant ensure that the
> > @@ -81,3 +113,11 @@ unsafe fn get_mut(&mut self, guard: CpuGuard) -> PerCpuToken<'_, T> {
> >          unsafe { PerCpuToken::new(guard, &self.alloc.0) }
> >      }
> >  }
> > +
> > +impl<T: InteriorMutable> CheckedPerCpu<T> for DynamicPerCpu<T> {
> > +    fn get(&mut self, guard: CpuGuard) -> CheckedPerCpuToken<'_, T> {
> > +        // SAFETY: By the invariant of `DynamicPerCpu`, the memory location in each CPU's
> > +        // per-CPU area corresponding to this variable has been initialized.
> > +        unsafe { CheckedPerCpuToken::new(guard, &self.alloc.0) }
> > +    }
> > +}
> > diff --git a/samples/rust/rust_percpu.rs b/samples/rust/rust_percpu.rs
> > index 98ca1c781b6b..06b322019134 100644
> > --- a/samples/rust/rust_percpu.rs
> > +++ b/samples/rust/rust_percpu.rs
> > @@ -130,13 +130,72 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
> >  
> >          // SAFETY: No prerequisites for on_each_cpu.
> >          unsafe {
> > -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
> > -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
> > -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 0);
> > -            on_each_cpu(Some(inc_percpu), (&raw mut test).cast(), 1);
> > -            on_each_cpu(Some(check_percpu), (&raw mut test).cast(), 1);
> > +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
> > +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
> > +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 0);
> > +            on_each_cpu(Some(inc_percpu_u64), (&raw mut test).cast(), 1);
> > +            on_each_cpu(Some(check_percpu_u64), (&raw mut test).cast(), 1);
> >          }
> >  
> > +        let mut checked: DynamicPerCpu<RefCell<u64>> =
> > +            DynamicPerCpu::new_with(RefCell::new(100), GFP_KERNEL).unwrap();
> > +
> > +        // SAFETY: No prerequisites for on_each_cpu.
> > +        unsafe {
> > +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
> > +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
> > +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 0);
> > +            on_each_cpu(Some(inc_percpu_refcell_u64), (&raw mut checked).cast(), 1);
> > +            on_each_cpu(Some(check_percpu_refcell_u64), (&raw mut checked).cast(), 1);
> > +        }
> > +
> > +        checked.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
> > +            assert!(*val.borrow() == 104);
> > +
> > +            let mut checked_native = 0;
> > +            *val.borrow_mut() = 0;
> > +
> > +            checked_native += 1;
> > +            *val.borrow_mut() += 1;
> > +            pr_info!(
> > +                "Checked native: {}, *checked: {}\n",
> > +                checked_native,
> > +                val.borrow()
> > +            );
> > +            assert!(checked_native == *val.borrow() && checked_native == 1);
> > +
> > +            checked_native = checked_native.wrapping_add((-1i64) as u64);
> > +            val.replace_with(|old: &mut u64| old.wrapping_add((-1i64) as u64));
> > +            pr_info!(
> > +                "Checked native: {}, *checked: {}\n",
> > +                checked_native,
> > +                val.borrow()
> > +            );
> > +            assert!(checked_native == *val.borrow() && checked_native == 0);
> > +
> > +            checked_native = checked_native.wrapping_add((-1i64) as u64);
> > +            val.replace_with(|old: &mut u64| old.wrapping_add((-1i64) as u64));
> > +            pr_info!(
> > +                "Checked native: {}, *checked: {}\n",
> > +                checked_native,
> > +                val.borrow()
> > +            );
> > +            assert!(checked_native == *val.borrow() && checked_native == (-1i64) as u64);
> > +
> > +            checked_native = 0;
> > +            *val.borrow_mut() = 0;
> > +
> > +            checked_native = checked_native.wrapping_sub(1);
> > +            val.replace_with(|old: &mut u64| old.wrapping_sub(1));
> > +            pr_info!(
> > +                "Checked native: {}, *checked: {}\n",
> > +                checked_native,
> > +                val.borrow()
> > +            );
> > +            assert!(checked_native == *val.borrow() && checked_native == (-1i64) as u64);
> > +            assert!(checked_native == *val.borrow() && checked_native == u64::MAX);
> > +        });
> > +
> >          pr_info!("rust dynamic percpu test done\n");
> >  
> >          // Return Err to unload the module
> > @@ -144,7 +203,7 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
> >      }
> >  }
> >  
> > -extern "C" fn inc_percpu(info: *mut c_void) {
> > +extern "C" fn inc_percpu_u64(info: *mut c_void) {
> >      // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
> >      let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
> >      pr_info!("Incrementing on {}\n", CpuId::current().as_u32());
> > @@ -153,7 +212,7 @@ extern "C" fn inc_percpu(info: *mut c_void) {
> >      unsafe { pcpu.get_mut(CpuGuard::new()) }.with(|val: &mut u64| *val += 1);
> >  }
> >  
> > -extern "C" fn check_percpu(info: *mut c_void) {
> > +extern "C" fn check_percpu_u64(info: *mut c_void) {
> >      // SAFETY: We know that info is a void *const DynamicPerCpu<u64> and DynamicPerCpu<u64> is Send.
> >      let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<u64>)).clone() };
> >      pr_info!("Asserting on {}\n", CpuId::current().as_u32());
> > @@ -161,3 +220,29 @@ extern "C" fn check_percpu(info: *mut c_void) {
> >      // SAFETY: We don't have multiple clones of pcpu in scope
> >      unsafe { pcpu.get_mut(CpuGuard::new()) }.with(|val: &mut u64| assert!(*val == 4));
> >  }
> > +
> > +extern "C" fn inc_percpu_refcell_u64(info: *mut c_void) {
> > +    // SAFETY: We know that info is a void *const DynamicPerCpu<RefCell<u64>> and
> > +    // DynamicPerCpu<RefCell<u64>> is Send.
> > +    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<RefCell<u64>>)).clone() };
> > +    // SAFETY: smp_processor_id has no preconditions
> > +    pr_info!("Incrementing on {}\n", CpuId::current().as_u32());
> > +
> > +    pcpu.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
> > +        let mut val = val.borrow_mut();
> > +        *val += 1;
> > +    });
> > +}
> > +
> > +extern "C" fn check_percpu_refcell_u64(info: *mut c_void) {
> > +    // SAFETY: We know that info is a void *const DynamicPerCpu<RefCell<u64>> and
> > +    // DynamicPerCpu<RefCell<u64>> is Send.
> > +    let mut pcpu = unsafe { (*(info as *const DynamicPerCpu<RefCell<u64>>)).clone() };
> > +    // SAFETY: smp_processor_id has no preconditions
> > +    pr_info!("Asserting on {}\n", CpuId::current().as_u32());
> > +
> > +    pcpu.get(CpuGuard::new()).with(|val: &RefCell<u64>| {
> > +        let val = val.borrow();
> > +        assert!(*val == 104);
> > +    });
> > +}
> > 
> > -- 
> > 2.34.1
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Yury Norov 4 weeks ago
On Thu, Sep 04, 2025 at 01:26:07PM -0700, Mitchell Levy wrote:
> On Wed, Sep 03, 2025 at 06:19:25PM -0400, Yury Norov wrote:
> > On Thu, Aug 28, 2025 at 12:00:12PM -0700, Mitchell Levy wrote:

...

> > > +impl<T: Clone> DynamicPerCpu<T> {
> > > +    /// Allocates a new per-CPU variable
> > > +    ///
> > > +    /// # Arguments
> > > +    /// * `val` - The initial value of the per-CPU variable on all CPUs.
> > > +    /// * `flags` - Flags used to allocate an `Arc` that keeps track of the underlying
> > > +    ///   `PerCpuAllocation`.
> > > +    pub fn new_with(val: T, flags: Flags) -> Option<Self> {
> > > +        let alloc: PerCpuAllocation<T> = PerCpuAllocation::new_uninit()?;
> > > +        let ptr = alloc.0;
> > > +
> > > +        for cpu in Cpumask::possible().iter() {
> > 
> > In C we've got the 'for_each_possible_cpu()'. Is there any way to
> > preserve that semantics in rust? I really believe that similar
> > semantics on higher level on both sides will help _a_lot_ for those
> > transitioning into the rust world (like me).
> 
> I'm not sure I understand what you mean --- I believe the semantics
> should be the same here (`cpu` takes on each value in
> `cpu_possible_mask`). Could you please clarify?
> 

I mean:

        for_each_possible_cpu(cpu) {
                let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
                unsafe { (*remote_ptr).write(val.clone()); }
                let arc = Arc::new(alloc, flags).ok()?;
                Some(Self { alloc: arc })
        }

Is it possible to do the above in rust?
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Mitchell Levy 4 weeks ago
On Thu, Sep 04, 2025 at 04:37:02PM -0400, Yury Norov wrote:
> On Thu, Sep 04, 2025 at 01:26:07PM -0700, Mitchell Levy wrote:
> > On Wed, Sep 03, 2025 at 06:19:25PM -0400, Yury Norov wrote:
> > > On Thu, Aug 28, 2025 at 12:00:12PM -0700, Mitchell Levy wrote:
> 
> ...
> 
> > > > +impl<T: Clone> DynamicPerCpu<T> {
> > > > +    /// Allocates a new per-CPU variable
> > > > +    ///
> > > > +    /// # Arguments
> > > > +    /// * `val` - The initial value of the per-CPU variable on all CPUs.
> > > > +    /// * `flags` - Flags used to allocate an `Arc` that keeps track of the underlying
> > > > +    ///   `PerCpuAllocation`.
> > > > +    pub fn new_with(val: T, flags: Flags) -> Option<Self> {
> > > > +        let alloc: PerCpuAllocation<T> = PerCpuAllocation::new_uninit()?;
> > > > +        let ptr = alloc.0;
> > > > +
> > > > +        for cpu in Cpumask::possible().iter() {
> > > 
> > > In C we've got the 'for_each_possible_cpu()'. Is there any way to
> > > preserve that semantics in rust? I really believe that similar
> > > semantics on higher level on both sides will help _a_lot_ for those
> > > transitioning into the rust world (like me).
> > 
> > I'm not sure I understand what you mean --- I believe the semantics
> > should be the same here (`cpu` takes on each value in
> > `cpu_possible_mask`). Could you please clarify?
> > 
> 
> I mean:
> 
>         for_each_possible_cpu(cpu) {
>                 let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
>                 unsafe { (*remote_ptr).write(val.clone()); }
>                 let arc = Arc::new(alloc, flags).ok()?;
>                 Some(Self { alloc: arc })
>         }
> 
> Is it possible to do the above in rust?

Ah, I see.

The syntax would be slightly different, probably something like

        use cpu::for_each_possible_cpu;

        for_each_possible_cpu(|&cpu| {
                let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
                // ...
        })

it *might* also be possible to use a macro and dispense with the need for
a closure, though I'm not familiar enough with proc macros to say for
sure. That would probably look like

        for_each_possible_cpu!(cpu) {
                let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
                // ...
        }

though personally I think the first one is better (simpler
implementation without too much syntactic overhead, especially since
closures are already used some within R4L).

Thanks,
Mitchell
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Miguel Ojeda 4 weeks ago
On Thu, Sep 4, 2025 at 11:05 PM Mitchell Levy <levymitchell0@gmail.com> wrote:
>
> it *might* also be possible to use a macro and dispense with the need for
> a closure, though I'm not familiar enough with proc macros to say for
> sure. That would probably look like
>
>         for_each_possible_cpu!(cpu) {
>                 let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
>                 // ...
>         }
>
> though personally I think the first one is better (simpler
> implementation without too much syntactic overhead, especially since
> closures are already used some within R4L).

Yeah, please avoid macros as much as possible. Sometimes macros do
have advantages, but if it is just to avoid a closure, then no, please
avoid it.

Similarly, unless there is a concrete advantage needed with the
function, please avoid it too -- the original `for` with the iterator
is the normal way of doing it and already used plenty in the kernel.

Thanks!

Cheers,
Miguel
Re: [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu
Posted by Yury Norov 4 weeks ago
> > 
> >         for_each_possible_cpu(cpu) {
> >                 let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
> >                 unsafe { (*remote_ptr).write(val.clone()); }
> >                 let arc = Arc::new(alloc, flags).ok()?;
> >                 Some(Self { alloc: arc })
> >         }
> > 
> > Is it possible to do the above in rust?
> 
> Ah, I see.
> 
> The syntax would be slightly different, probably something like
> 
>         use cpu::for_each_possible_cpu;
> 
>         for_each_possible_cpu(|&cpu| {
>                 let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
>                 // ...
>         })
> 
> it *might* also be possible to use a macro and dispense with the need for
> a closure, though I'm not familiar enough with proc macros to say for
> sure. That would probably look like
> 
>         for_each_possible_cpu!(cpu) {
>                 let remote_ptr = unsafe { ptr.get_remote_ptr(cpu) };
>                 // ...
>         }
> 
> though personally I think the first one is better (simpler
> implementation without too much syntactic overhead, especially since
> closures are already used some within R4L).

Sure, #1 is OK if you prefer it.