[v1] Introduce simple hazard pointers for lockdep

[PATCH 1/8] Introduce simple hazard pointers

Posted by Boqun Feng 3 months, 2 weeks ago

As its name suggests, simple hazard pointers (shazptr) is a
simplification of hazard pointers [1]: it has only one hazard pointer
slot per-CPU and is targeted for simple use cases where the read-side
already has preemption disabled. It's a trade-off between full features
of a normal hazard pointer implementation (multiple slots, dynamic slot
allocation, etc.) and the simple use scenario.

Since there's only one slot per-CPU, so shazptr read-side critical
section nesting is a problem that needs to be resolved, because at very
least, interrupts and NMI can introduce nested shazptr read-side
critical sections. A SHAZPTR_WILDCARD is introduced to resolve this:
SHAZPTR_WILDCARD is a special address value that blocks *all* shazptr
waiters. In an interrupt-causing shazptr read-side critical section
nesting case (i.e. an interrupt happens while the per-CPU hazard pointer
slot being used and tries to acquire a hazard pointer itself), the inner
critical section will switch the value of the hazard pointer slot into
SHAZPTR_WILDCARD, and let the outer critical section eventually zero the
slot. The SHAZPTR_WILDCARD still provide the correct protection because
it blocks all the waiters.

It's true that once the wildcard mechanism is activated, shazptr
mechanism may be downgrade to something similar to RCU (and probably
with a worse implementation), which generally has longer wait time and
larger memory footprint compared to a typical hazard pointer
implementation. However, that can only happen with a lot of users using
hazard pointers, and then it's reasonable to introduce the
fully-featured hazard pointer implementation [2] and switch users to it.

Note that shazptr_protect() may be added later, the current potential
usage doesn't require it, and a shazptr_acquire(), which installs the
protected value to hazard pointer slot and proves the smp_mb(), is
enough for now.

[1]: M. M. Michael, "Hazard pointers: safe memory reclamation for
     lock-free objects," in IEEE Transactions on Parallel and
     Distributed Systems, vol. 15, no. 6, pp. 491-504, June 2004

Link: https://lore.kernel.org/lkml/20240917143402.930114-1-boqun.feng@gmail.com/ [2]
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
---
 include/linux/shazptr.h  | 73 ++++++++++++++++++++++++++++++++++++++++
 kernel/locking/Makefile  |  2 +-
 kernel/locking/shazptr.c | 29 ++++++++++++++++
 3 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/shazptr.h
 create mode 100644 kernel/locking/shazptr.c

diff --git a/include/linux/shazptr.h b/include/linux/shazptr.h
new file mode 100644
index 000000000000..287cd04b4be9
--- /dev/null
+++ b/include/linux/shazptr.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple hazard pointers
+ *
+ * Copyright (c) 2025, Microsoft Corporation.
+ *
+ * Author: Boqun Feng <boqun.feng@gmail.com>
+ *
+ * A simple variant of hazard pointers, the users must ensure the preemption
+ * is already disabled when calling a shazptr_acquire() to protect an address.
+ * If one shazptr_acquire() is called after another shazptr_acquire() has been
+ * called without the corresponding shazptr_clear() has been called, the later
+ * shazptr_acquire() must be cleared first.
+ *
+ * The most suitable usage is when only one address need to be protected in a
+ * preemption disabled critical section.
+ */
+
+#ifndef _LINUX_SHAZPTR_H
+#define _LINUX_SHAZPTR_H
+
+#include <linux/cleanup.h>
+#include <linux/percpu.h>
+
+/* Make ULONG_MAX the wildcard value */
+#define SHAZPTR_WILDCARD ((void *)(ULONG_MAX))
+
+DECLARE_PER_CPU_SHARED_ALIGNED(void *, shazptr_slots);
+
+/* Represent a held hazard pointer slot */
+struct shazptr_guard {
+	void **slot;
+	bool use_wildcard;
+};
+
+/*
+ * Acquire a hazptr slot and begin the hazard pointer critical section.
+ *
+ * Must be called with preemption disabled, and preemption must remain disabled
+ * until shazptr_clear().
+ */
+static inline struct shazptr_guard shazptr_acquire(void *ptr)
+{
+	struct shazptr_guard guard = {
+		/* Preemption is disabled. */
+		.slot = this_cpu_ptr(&shazptr_slots),
+		.use_wildcard = false,
+	};
+
+	if (likely(!READ_ONCE(*guard.slot))) {
+		WRITE_ONCE(*guard.slot, ptr);
+	} else {
+		guard.use_wildcard = true;
+		WRITE_ONCE(*guard.slot, SHAZPTR_WILDCARD);
+	}
+
+	smp_mb(); /* Synchronize with smp_mb() at synchronize_shazptr(). */
+
+	return guard;
+}
+
+static inline void shazptr_clear(struct shazptr_guard guard)
+{
+	/* Only clear the slot when the outermost guard is released */
+	if (likely(!guard.use_wildcard))
+		smp_store_release(guard.slot, NULL); /* Pair with ACQUIRE at synchronize_shazptr() */
+}
+
+void synchronize_shazptr(void *ptr);
+
+DEFINE_CLASS(shazptr, struct shazptr_guard, shazptr_clear(_T),
+	     shazptr_acquire(ptr), void *ptr);
+#endif
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index a114949eeed5..1517076c98ec 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,7 +3,7 @@
 # and is generally not a function of system call inputs.
 KCOV_INSTRUMENT		:= n
 
-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
+obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o shazptr.o
 
 # Avoid recursion lockdep -> sanitizer -> ... -> lockdep & improve performance.
 KASAN_SANITIZE_lockdep.o := n
diff --git a/kernel/locking/shazptr.c b/kernel/locking/shazptr.c
new file mode 100644
index 000000000000..991fd1a05cfd
--- /dev/null
+++ b/kernel/locking/shazptr.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple hazard pointers
+ *
+ * Copyright (c) 2025, Microsoft Corporation.
+ *
+ * Author: Boqun Feng <boqun.feng@gmail.com>
+ */
+
+#include <linux/atomic.h>
+#include <linux/cpumask.h>
+#include <linux/shazptr.h>
+
+DEFINE_PER_CPU_SHARED_ALIGNED(void *, shazptr_slots);
+EXPORT_PER_CPU_SYMBOL_GPL(shazptr_slots);
+
+void synchronize_shazptr(void *ptr)
+{
+	int cpu;
+
+	smp_mb(); /* Synchronize with the smp_mb() in shazptr_acquire(). */
+	for_each_possible_cpu(cpu) {
+		void **slot = per_cpu_ptr(&shazptr_slots, cpu);
+		/* Pair with smp_store_release() in shazptr_clear(). */
+		smp_cond_load_acquire(slot,
+				      VAL != ptr && VAL != SHAZPTR_WILDCARD);
+	}
+}
+EXPORT_SYMBOL_GPL(synchronize_shazptr);
-- 
2.39.5 (Apple Git-154)

Re: [PATCH 1/8] Introduce simple hazard pointers

Posted by Waiman Long 3 months, 2 weeks ago

On 6/24/25 11:10 PM, Boqun Feng wrote:
> As its name suggests, simple hazard pointers (shazptr) is a
> simplification of hazard pointers [1]: it has only one hazard pointer
> slot per-CPU and is targeted for simple use cases where the read-side
> already has preemption disabled. It's a trade-off between full features
> of a normal hazard pointer implementation (multiple slots, dynamic slot
> allocation, etc.) and the simple use scenario.
>
> Since there's only one slot per-CPU, so shazptr read-side critical
> section nesting is a problem that needs to be resolved, because at very
> least, interrupts and NMI can introduce nested shazptr read-side
> critical sections. A SHAZPTR_WILDCARD is introduced to resolve this:
> SHAZPTR_WILDCARD is a special address value that blocks *all* shazptr
> waiters. In an interrupt-causing shazptr read-side critical section
> nesting case (i.e. an interrupt happens while the per-CPU hazard pointer
> slot being used and tries to acquire a hazard pointer itself), the inner
> critical section will switch the value of the hazard pointer slot into
> SHAZPTR_WILDCARD, and let the outer critical section eventually zero the
> slot. The SHAZPTR_WILDCARD still provide the correct protection because
> it blocks all the waiters.
>
> It's true that once the wildcard mechanism is activated, shazptr
> mechanism may be downgrade to something similar to RCU (and probably
> with a worse implementation), which generally has longer wait time and
> larger memory footprint compared to a typical hazard pointer
> implementation. However, that can only happen with a lot of users using
> hazard pointers, and then it's reasonable to introduce the
> fully-featured hazard pointer implementation [2] and switch users to it.
>
> Note that shazptr_protect() may be added later, the current potential
> usage doesn't require it, and a shazptr_acquire(), which installs the
> protected value to hazard pointer slot and proves the smp_mb(), is
> enough for now.
>
> [1]: M. M. Michael, "Hazard pointers: safe memory reclamation for
>       lock-free objects," in IEEE Transactions on Parallel and
>       Distributed Systems, vol. 15, no. 6, pp. 491-504, June 2004
>
> Link: https://lore.kernel.org/lkml/20240917143402.930114-1-boqun.feng@gmail.com/ [2]
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> ---
>   include/linux/shazptr.h  | 73 ++++++++++++++++++++++++++++++++++++++++
>   kernel/locking/Makefile  |  2 +-
>   kernel/locking/shazptr.c | 29 ++++++++++++++++
>   3 files changed, 103 insertions(+), 1 deletion(-)
>   create mode 100644 include/linux/shazptr.h
>   create mode 100644 kernel/locking/shazptr.c
>
> diff --git a/include/linux/shazptr.h b/include/linux/shazptr.h
> new file mode 100644
> index 000000000000..287cd04b4be9
> --- /dev/null
> +++ b/include/linux/shazptr.h
> @@ -0,0 +1,73 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Simple hazard pointers
> + *
> + * Copyright (c) 2025, Microsoft Corporation.
> + *
> + * Author: Boqun Feng <boqun.feng@gmail.com>
> + *
> + * A simple variant of hazard pointers, the users must ensure the preemption
> + * is already disabled when calling a shazptr_acquire() to protect an address.
> + * If one shazptr_acquire() is called after another shazptr_acquire() has been
> + * called without the corresponding shazptr_clear() has been called, the later
> + * shazptr_acquire() must be cleared first.
> + *
> + * The most suitable usage is when only one address need to be protected in a
> + * preemption disabled critical section.
> + */
> +
> +#ifndef _LINUX_SHAZPTR_H
> +#define _LINUX_SHAZPTR_H
> +
> +#include <linux/cleanup.h>
> +#include <linux/percpu.h>
> +
> +/* Make ULONG_MAX the wildcard value */
> +#define SHAZPTR_WILDCARD ((void *)(ULONG_MAX))
> +
> +DECLARE_PER_CPU_SHARED_ALIGNED(void *, shazptr_slots);
> +
> +/* Represent a held hazard pointer slot */
> +struct shazptr_guard {
> +	void **slot;
> +	bool use_wildcard;
> +};
> +
> +/*
> + * Acquire a hazptr slot and begin the hazard pointer critical section.
> + *
> + * Must be called with preemption disabled, and preemption must remain disabled
> + * until shazptr_clear().
> + */
> +static inline struct shazptr_guard shazptr_acquire(void *ptr)
> +{
> +	struct shazptr_guard guard = {
> +		/* Preemption is disabled. */
> +		.slot = this_cpu_ptr(&shazptr_slots),
> +		.use_wildcard = false,
> +	};
> +
> +	if (likely(!READ_ONCE(*guard.slot))) {
> +		WRITE_ONCE(*guard.slot, ptr);
> +	} else {
> +		guard.use_wildcard = true;
> +		WRITE_ONCE(*guard.slot, SHAZPTR_WILDCARD);
> +	}
Is it correct to assume that shazptr cannot be used in a mixed context 
environment on the same CPU like a task context and an interrupt context 
trying to acquire it simultaneously because the current check isn't 
atomic with respect to that?
> +
> +	smp_mb(); /* Synchronize with smp_mb() at synchronize_shazptr(). */
> +
> +	return guard;
> +}
> +
> +static inline void shazptr_clear(struct shazptr_guard guard)
> +{
> +	/* Only clear the slot when the outermost guard is released */
> +	if (likely(!guard.use_wildcard))
> +		smp_store_release(guard.slot, NULL); /* Pair with ACQUIRE at synchronize_shazptr() */
> +}

Is it better to name it shazptr_release() to be conformant with our 
current locking convention?

Cheers,
Longman

Re: [PATCH 1/8] Introduce simple hazard pointers

Posted by Boqun Feng 3 months, 2 weeks ago

On Wed, Jun 25, 2025 at 11:52:04AM -0400, Waiman Long wrote:
[...]
> > +/*
> > + * Acquire a hazptr slot and begin the hazard pointer critical section.
> > + *
> > + * Must be called with preemption disabled, and preemption must remain disabled
> > + * until shazptr_clear().
> > + */
> > +static inline struct shazptr_guard shazptr_acquire(void *ptr)
> > +{
> > +	struct shazptr_guard guard = {
> > +		/* Preemption is disabled. */
> > +		.slot = this_cpu_ptr(&shazptr_slots),
> > +		.use_wildcard = false,
> > +	};
> > +
> > +	if (likely(!READ_ONCE(*guard.slot))) {
> > +		WRITE_ONCE(*guard.slot, ptr);
> > +	} else {
> > +		guard.use_wildcard = true;
> > +		WRITE_ONCE(*guard.slot, SHAZPTR_WILDCARD);
> > +	}
> Is it correct to assume that shazptr cannot be used in a mixed context
> environment on the same CPU like a task context and an interrupt context
> trying to acquire it simultaneously because the current check isn't atomic
> with respect to that?

I think the current implementation actually support mixed context usage,
let see (assuming we start in a task context):

	if (likely(!READ_ONCE(*guard.slot))) {

if an interrupt happens here, it's fine because the slot is still empty,
as long as the interrupt will eventually clear the slot.

		WRITE_ONCE(*guard.slot, ptr);

if an interrupt happens here, it's fine because the interrupt would
notice that the slot is already occupied, hence the interrupt will use a
wildcard, and because it uses a wild, it won't clear the slot after it
returns. However the task context's shazptr_clear() will eventually
clear the slot because its guard's .use_wildcard is false.

	} else {

if an interrupt happens here, it's fine because of the same: interrupt
will use wildcard, and it will not clear the slot, and some
shazptr_clear() in the task context will eventually clear it.

		guard.use_wildcard = true;
		WRITE_ONCE(*guard.slot, SHAZPTR_WILDCARD);

if an interrupt happens here, it's fine because of the same.

	}

It's similar to why rcu_read_lock() can be just a non-atomic inc.

> > +
> > +	smp_mb(); /* Synchronize with smp_mb() at synchronize_shazptr(). */
> > +
> > +	return guard;
> > +}
> > +
> > +static inline void shazptr_clear(struct shazptr_guard guard)
> > +{
> > +	/* Only clear the slot when the outermost guard is released */
> > +	if (likely(!guard.use_wildcard))
> > +		smp_store_release(guard.slot, NULL); /* Pair with ACQUIRE at synchronize_shazptr() */
> > +}
> 
> Is it better to name it shazptr_release() to be conformant with our current
> locking convention?
> 

Maybe, but I will need to think about slot reusing between
shazptr_acquire() and shazptr_release(), in the general hazptr API,
you can hazptr_alloc() a slot, use it and hazptr_clear() and then
use it again, eventually hazptr_free(). I would like to keep both hazptr
APIs consistent as well. Thanks!

Regards,
Boqun

> Cheers,
> Longman
>

Re: [PATCH 1/8] Introduce simple hazard pointers

Posted by Waiman Long 3 months, 2 weeks ago

On 6/25/25 12:09 PM, Boqun Feng wrote:
> On Wed, Jun 25, 2025 at 11:52:04AM -0400, Waiman Long wrote:
> [...]
>>> +/*
>>> + * Acquire a hazptr slot and begin the hazard pointer critical section.
>>> + *
>>> + * Must be called with preemption disabled, and preemption must remain disabled
>>> + * until shazptr_clear().
>>> + */
>>> +static inline struct shazptr_guard shazptr_acquire(void *ptr)
>>> +{
>>> +	struct shazptr_guard guard = {
>>> +		/* Preemption is disabled. */
>>> +		.slot = this_cpu_ptr(&shazptr_slots),
>>> +		.use_wildcard = false,
>>> +	};
>>> +
>>> +	if (likely(!READ_ONCE(*guard.slot))) {
>>> +		WRITE_ONCE(*guard.slot, ptr);
>>> +	} else {
>>> +		guard.use_wildcard = true;
>>> +		WRITE_ONCE(*guard.slot, SHAZPTR_WILDCARD);
>>> +	}
>> Is it correct to assume that shazptr cannot be used in a mixed context
>> environment on the same CPU like a task context and an interrupt context
>> trying to acquire it simultaneously because the current check isn't atomic
>> with respect to that?
> I think the current implementation actually support mixed context usage,
> let see (assuming we start in a task context):
>
> 	if (likely(!READ_ONCE(*guard.slot))) {
>
> if an interrupt happens here, it's fine because the slot is still empty,
> as long as the interrupt will eventually clear the slot.
>
> 		WRITE_ONCE(*guard.slot, ptr);
>
> if an interrupt happens here, it's fine because the interrupt would
> notice that the slot is already occupied, hence the interrupt will use a
> wildcard, and because it uses a wild, it won't clear the slot after it
> returns. However the task context's shazptr_clear() will eventually
> clear the slot because its guard's .use_wildcard is false.
>
> 	} else {
>
> if an interrupt happens here, it's fine because of the same: interrupt
> will use wildcard, and it will not clear the slot, and some
> shazptr_clear() in the task context will eventually clear it.
>
> 		guard.use_wildcard = true;
> 		WRITE_ONCE(*guard.slot, SHAZPTR_WILDCARD);
>
> if an interrupt happens here, it's fine because of the same.
>
> 	}
>
>
> It's similar to why rcu_read_lock() can be just a non-atomic inc.
You are right.
>
>>> +
>>> +	smp_mb(); /* Synchronize with smp_mb() at synchronize_shazptr(). */
>>> +
>>> +	return guard;
>>> +}
>>> +
>>> +static inline void shazptr_clear(struct shazptr_guard guard)
>>> +{
>>> +	/* Only clear the slot when the outermost guard is released */
>>> +	if (likely(!guard.use_wildcard))
>>> +		smp_store_release(guard.slot, NULL); /* Pair with ACQUIRE at synchronize_shazptr() */
>>> +}
>> Is it better to name it shazptr_release() to be conformant with our current
>> locking convention?
>>
> Maybe, but I will need to think about slot reusing between
> shazptr_acquire() and shazptr_release(), in the general hazptr API,
> you can hazptr_alloc() a slot, use it and hazptr_clear() and then
> use it again, eventually hazptr_free(). I would like to keep both hazptr
> APIs consistent as well. Thanks!

Thanks for the explanation. Maybe we can reuse the general hazptr API 
names (alloc/clear/free) even though shazptr_free() will be a no-op for 
now. Just that the current acquire/clear naming looks odd to me.

Thanks,
Longman

Re: [PATCH 1/8] Introduce simple hazard pointers

Posted by Mathieu Desnoyers 3 months, 2 weeks ago

On 2025-06-24 23:10, Boqun Feng wrote:
[...]
> +
> +static inline void shazptr_clear(struct shazptr_guard guard)
> +{
> +	/* Only clear the slot when the outermost guard is released */
> +	if (likely(!guard.use_wildcard))
> +		smp_store_release(guard.slot, NULL); /* Pair with ACQUIRE at synchronize_shazptr() */

How is the wildcard ever cleared ?

Thanks,

Mathieu


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

Re: [PATCH 1/8] Introduce simple hazard pointers

Posted by Boqun Feng 3 months, 2 weeks ago

On Wed, Jun 25, 2025 at 10:25:23AM -0400, Mathieu Desnoyers wrote:
> On 2025-06-24 23:10, Boqun Feng wrote:
> [...]
> > +
> > +static inline void shazptr_clear(struct shazptr_guard guard)
> > +{
> > +	/* Only clear the slot when the outermost guard is released */
> > +	if (likely(!guard.use_wildcard))
> > +		smp_store_release(guard.slot, NULL); /* Pair with ACQUIRE at synchronize_shazptr() */
> 
> How is the wildcard ever cleared ?
> 

The outermost shazptr_guard will have .use_wildcard being false, so it
will clear the wildcard. E.g.

	g1 = shazptr_acqure(a); // g1->use_wildcard is false
				// this cpu's hazptr slot is 'a'.
	g2 = shazptr_acqure(b); // g2->use_wildcard is true
				// this cpu's hazptr slot becomes
				// WILDCARD.

	shazptr_clear(g2);	// do nothing
	shazptr_clear(g1);	// clear this cpu's hazptr slot to NULL.

Regards,
Boqun

> Thanks,
> 
> Mathieu
> 
> 
> -- 
> Mathieu Desnoyers
> EfficiOS Inc.
> https://www.efficios.com

Re: [PATCH 1/8] Introduce simple hazard pointers

Posted by Peter Zijlstra 3 months, 2 weeks ago

On Tue, Jun 24, 2025 at 08:10:54PM -0700, Boqun Feng wrote:
> As its name suggests, simple hazard pointers (shazptr) is a
> simplification of hazard pointers [1]: it has only one hazard pointer
> slot per-CPU and is targeted for simple use cases where the read-side
> already has preemption disabled. It's a trade-off between full features
> of a normal hazard pointer implementation (multiple slots, dynamic slot
> allocation, etc.) and the simple use scenario.
> 
> Since there's only one slot per-CPU, so shazptr read-side critical
> section nesting is a problem that needs to be resolved, because at very
> least, interrupts and NMI can introduce nested shazptr read-side
> critical sections. A SHAZPTR_WILDCARD is introduced to resolve this:
> SHAZPTR_WILDCARD is a special address value that blocks *all* shazptr
> waiters. In an interrupt-causing shazptr read-side critical section
> nesting case (i.e. an interrupt happens while the per-CPU hazard pointer
> slot being used and tries to acquire a hazard pointer itself), the inner
> critical section will switch the value of the hazard pointer slot into
> SHAZPTR_WILDCARD, and let the outer critical section eventually zero the
> slot. The SHAZPTR_WILDCARD still provide the correct protection because
> it blocks all the waiters.

Don't we typically name such a thing a tombstone?

> It's true that once the wildcard mechanism is activated, shazptr
> mechanism may be downgrade to something similar to RCU (and probably
> with a worse implementation), which generally has longer wait time and
> larger memory footprint compared to a typical hazard pointer
> implementation. However, that can only happen with a lot of users using
> hazard pointers, and then it's reasonable to introduce the
> fully-featured hazard pointer implementation [2] and switch users to it.
> 
> Note that shazptr_protect() may be added later, the current potential
> usage doesn't require it, and a shazptr_acquire(), which installs the
> protected value to hazard pointer slot and proves the smp_mb(), is
> enough for now.
> 
> [1]: M. M. Michael, "Hazard pointers: safe memory reclamation for
>      lock-free objects," in IEEE Transactions on Parallel and
>      Distributed Systems, vol. 15, no. 6, pp. 491-504, June 2004
> 
> Link: https://lore.kernel.org/lkml/20240917143402.930114-1-boqun.feng@gmail.com/ [2]
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> ---
>  include/linux/shazptr.h  | 73 ++++++++++++++++++++++++++++++++++++++++
>  kernel/locking/Makefile  |  2 +-
>  kernel/locking/shazptr.c | 29 ++++++++++++++++
>  3 files changed, 103 insertions(+), 1 deletion(-)
>  create mode 100644 include/linux/shazptr.h
>  create mode 100644 kernel/locking/shazptr.c
> 
> diff --git a/include/linux/shazptr.h b/include/linux/shazptr.h
> new file mode 100644
> index 000000000000..287cd04b4be9
> --- /dev/null
> +++ b/include/linux/shazptr.h
> @@ -0,0 +1,73 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Simple hazard pointers
> + *
> + * Copyright (c) 2025, Microsoft Corporation.
> + *
> + * Author: Boqun Feng <boqun.feng@gmail.com>
> + *
> + * A simple variant of hazard pointers, the users must ensure the preemption
> + * is already disabled when calling a shazptr_acquire() to protect an address.
> + * If one shazptr_acquire() is called after another shazptr_acquire() has been
> + * called without the corresponding shazptr_clear() has been called, the later
> + * shazptr_acquire() must be cleared first.
> + *
> + * The most suitable usage is when only one address need to be protected in a
> + * preemption disabled critical section.

It might be useful to have some example code included here to illustrate
how this is supposed to be used etc.

> + */
> +
> +#ifndef _LINUX_SHAZPTR_H
> +#define _LINUX_SHAZPTR_H
> +
> +#include <linux/cleanup.h>
> +#include <linux/percpu.h>
> +
> +/* Make ULONG_MAX the wildcard value */
> +#define SHAZPTR_WILDCARD ((void *)(ULONG_MAX))

Right, I typically write that like: ((void *)-1L) or ((void *)~0UL)

> +
> +DECLARE_PER_CPU_SHARED_ALIGNED(void *, shazptr_slots);
> +
> +/* Represent a held hazard pointer slot */
> +struct shazptr_guard {
> +	void **slot;
> +	bool use_wildcard;
> +};

Natural alignment ensures the LSB of that pointer is 0, which is enough
space to stick that bool in, no?

> +
> +/*
> + * Acquire a hazptr slot and begin the hazard pointer critical section.
> + *
> + * Must be called with preemption disabled, and preemption must remain disabled
> + * until shazptr_clear().
> + */
> +static inline struct shazptr_guard shazptr_acquire(void *ptr)
> +{
> +	struct shazptr_guard guard = {
> +		/* Preemption is disabled. */
> +		.slot = this_cpu_ptr(&shazptr_slots),

What you're trying to say with that comment is that: this_cpu_ptr(),
will complain if preemption is not already disabled, and as such this
verifies the assumption?

You can also add:

	lockdep_assert_preemption_disabled();

at the start of this function and then all these comments can go in the
bin, no?

> +		.use_wildcard = false,
> +	};
> +
> +	if (likely(!READ_ONCE(*guard.slot))) {
> +		WRITE_ONCE(*guard.slot, ptr);
> +	} else {
> +		guard.use_wildcard = true;
> +		WRITE_ONCE(*guard.slot, SHAZPTR_WILDCARD);
> +	}
> +
> +	smp_mb(); /* Synchronize with smp_mb() at synchronize_shazptr(). */
> +
> +	return guard;
> +}
> +
> +static inline void shazptr_clear(struct shazptr_guard guard)
> +{
> +	/* Only clear the slot when the outermost guard is released */
> +	if (likely(!guard.use_wildcard))
> +		smp_store_release(guard.slot, NULL); /* Pair with ACQUIRE at synchronize_shazptr() */
> +}
> +
> +void synchronize_shazptr(void *ptr);
> +
> +DEFINE_CLASS(shazptr, struct shazptr_guard, shazptr_clear(_T),
> +	     shazptr_acquire(ptr), void *ptr);
> +#endif
> diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
> index a114949eeed5..1517076c98ec 100644
> --- a/kernel/locking/Makefile
> +++ b/kernel/locking/Makefile
> @@ -3,7 +3,7 @@
>  # and is generally not a function of system call inputs.
>  KCOV_INSTRUMENT		:= n
>  
> -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
> +obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o shazptr.o
>  
>  # Avoid recursion lockdep -> sanitizer -> ... -> lockdep & improve performance.
>  KASAN_SANITIZE_lockdep.o := n
> diff --git a/kernel/locking/shazptr.c b/kernel/locking/shazptr.c
> new file mode 100644
> index 000000000000..991fd1a05cfd
> --- /dev/null
> +++ b/kernel/locking/shazptr.c
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Simple hazard pointers
> + *
> + * Copyright (c) 2025, Microsoft Corporation.
> + *
> + * Author: Boqun Feng <boqun.feng@gmail.com>
> + */
> +
> +#include <linux/atomic.h>
> +#include <linux/cpumask.h>
> +#include <linux/shazptr.h>
> +
> +DEFINE_PER_CPU_SHARED_ALIGNED(void *, shazptr_slots);
> +EXPORT_PER_CPU_SYMBOL_GPL(shazptr_slots);
> +
> +void synchronize_shazptr(void *ptr)
> +{
> +	int cpu;

	lockdep_assert_preemption_enabled();

> +
> +	smp_mb(); /* Synchronize with the smp_mb() in shazptr_acquire(). */
> +	for_each_possible_cpu(cpu) {
> +		void **slot = per_cpu_ptr(&shazptr_slots, cpu);
> +		/* Pair with smp_store_release() in shazptr_clear(). */
> +		smp_cond_load_acquire(slot,
> +				      VAL != ptr && VAL != SHAZPTR_WILDCARD);
> +	}
> +}
> +EXPORT_SYMBOL_GPL(synchronize_shazptr);
> -- 
> 2.39.5 (Apple Git-154)
>