[PATCH v5 8/8] rust: percpu: cache per-CPU pointers in the dynamic case

Mitchell Levy posted 8 patches 5 hours ago
[PATCH v5 8/8] rust: percpu: cache per-CPU pointers in the dynamic case
Posted by Mitchell Levy 5 hours ago
Currently, the creation of a `PerCpuNumeric` requires a memory read via
the `Arc` managing the dynamic allocation. While the compiler might be
clever enough to consolidate these reads in some cases, the read must
happen *somewhere*, which, when we're concerning ourselves with
individual instructions, is a very high burden.

Instead, cache the `PerCpuPointer` inside the `DynamicPerCpu` structure;
then, the `Arc` is used solely to manage the allocation.

We might as well also use this speed-up in the standard `get` and
`get_mut` accessors that give a `{Checked,}PerCpuToken`

Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
---
 rust/kernel/percpu/dynamic.rs | 34 ++++++++++++++++++----------------
 rust/kernel/percpu/numeric.rs |  4 ++--
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/rust/kernel/percpu/dynamic.rs b/rust/kernel/percpu/dynamic.rs
index a717138b93dc..675210098ed8 100644
--- a/rust/kernel/percpu/dynamic.rs
+++ b/rust/kernel/percpu/dynamic.rs
@@ -101,6 +101,9 @@ pub struct DynamicPerCpu<T> {
     // INVARIANT: The memory location in each CPU's per-CPU area pointed at by the alloc is
     // initialized.
     alloc: Option<Arc<PerCpuAllocation<T>>>,
+    // INVARIANT: `ptr` is the per-CPU pointer managed by `alloc`, which does not change for the
+    // lifetime of `self`.
+    pub(super) ptr: PerCpuPtr<T>,
 }
 
 impl<T: Zeroable> DynamicPerCpu<T> {
@@ -112,9 +115,13 @@ impl<T: Zeroable> DynamicPerCpu<T> {
     pub fn new_zero(flags: Flags) -> Option<Self> {
         let alloc: PerCpuAllocation<T> = PerCpuAllocation::new_zero()?;
 
+        let ptr = alloc.0;
         let arc = Arc::new(alloc, flags).ok()?;
 
-        Some(Self { alloc: Some(arc) })
+        Some(Self {
+            alloc: Some(arc),
+            ptr,
+        })
     }
 }
 
@@ -158,15 +165,10 @@ pub fn new_from(mut initer: impl FnMut(CpuId) -> T, flags: Flags) -> Option<Self
             }
         }
 
-        Some(Self { alloc: Some(arc) })
-    }
-}
-
-impl<T> DynamicPerCpu<T> {
-    /// Gets the allocation backing this per-CPU variable.
-    pub(crate) fn alloc(&self) -> &Arc<PerCpuAllocation<T>> {
-        // SAFETY: This type's invariant ensures that `self.alloc` is `Some`.
-        unsafe { self.alloc.as_ref().unwrap_unchecked() }
+        Some(Self {
+            alloc: Some(arc),
+            ptr,
+        })
     }
 }
 
@@ -178,11 +180,11 @@ unsafe fn get_mut(&mut self, guard: CpuGuard) -> PerCpuToken<'_, T> {
         //    exists on the current CPU.
         // 3. The invariants of `DynamicPerCpu` ensure that the contents of the allocation are
         //    initialized on each CPU.
-        // 4. The existence of a reference to the `PerCpuAllocation` ensures that the allocation is
-        //    live.
+        // 4. `&mut self` holds a reference to the `PerCpuAllocation`, so the allocation it manages
+        //    is live.
         // 5. The invariants of `DynamicPerCpu` ensure that the allocation is sized and aligned for
         //    a `T`.
-        unsafe { PerCpuToken::new(guard, &self.alloc.as_ref().unwrap_unchecked().0) }
+        unsafe { PerCpuToken::new(guard, &self.ptr) }
     }
 }
 
@@ -192,11 +194,11 @@ fn get(&self, guard: CpuGuard) -> CheckedPerCpuToken<'_, T> {
         // 1. Invariants of this type assure that `alloc` is `Some`.
         // 2. The invariants of `DynamicPerCpu` ensure that the contents of the allocation are
         //    initialized on each CPU.
-        // 3. The existence of a reference to the `PerCpuAllocation` ensures that the allocation is
-        //    live.
+        // 3. `&mut self` holds a reference to the `PerCpuAllocation`, so the allocation it manages
+        //    is live.
         // 4. The invariants of `DynamicPerCpu` ensure that the allocation is sized and aligned for
         //    a `T`.
-        unsafe { CheckedPerCpuToken::new(guard, &self.alloc.as_ref().unwrap_unchecked().0) }
+        unsafe { CheckedPerCpuToken::new(guard, &self.ptr) }
     }
 }
 
diff --git a/rust/kernel/percpu/numeric.rs b/rust/kernel/percpu/numeric.rs
index 13b4ab4a794d..4841843b05f7 100644
--- a/rust/kernel/percpu/numeric.rs
+++ b/rust/kernel/percpu/numeric.rs
@@ -22,7 +22,7 @@ impl DynamicPerCpu<$ty> {
             pub fn num(&mut self) -> PerCpuNumeric<'_, $ty> {
                 // The invariant is satisfied because `DynamicPerCpu`'s invariant guarantees that
                 // this pointer is valid and initialized on all CPUs.
-                PerCpuNumeric { ptr: &self.alloc().0 }
+                PerCpuNumeric { ptr: &self.ptr }
             }
         }
         impl StaticPerCpu<$ty> {
@@ -78,7 +78,7 @@ impl DynamicPerCpu<$ty> {
             pub fn num(&mut self) -> PerCpuNumeric<'_, $ty> {
                 // The invariant is satisfied because `DynamicPerCpu`'s invariant guarantees that
                 // this pointer is valid and initialized on all CPUs.
-                PerCpuNumeric { ptr: &self.alloc().0 }
+                PerCpuNumeric { ptr: &self.ptr }
             }
         }
         impl StaticPerCpu<$ty> {

-- 
2.34.1