Move the percpu_up_read() slowpath out of the inline function into a new
__percpu_up_read_slowpath() to avoid binary size increase from adding a
tracepoint to an inlined function.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/linux/percpu-rwsem.h | 15 +++------------
kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c8cb010d655e..89506895365c 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
return ret;
}
+void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem);
+
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
@@ -118,18 +120,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss))) {
this_cpu_dec(*sem->read_count);
} else {
- /*
- * slowpath; reader will only ever wake a single blocked
- * writer.
- */
- smp_mb(); /* B matches C */
- /*
- * In other words, if they see our decrement (presumably to
- * aggregate zero, as that is the only time it matters) they
- * will also see our critical section.
- */
- this_cpu_dec(*sem->read_count);
- rcuwait_wake_up(&sem->writer);
+ __percpu_up_read_slowpath(sem);
}
preempt_enable();
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ef234469baac..4190635458da 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -288,3 +288,21 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
rcu_sync_exit(&sem->rss);
}
EXPORT_SYMBOL_GPL(percpu_up_write);
+
+void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem)
+{
+ lockdep_assert_preemption_disabled();
+ /*
+ * slowpath; reader will only ever wake a single blocked
+ * writer.
+ */
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to
+ * aggregate zero, as that is the only time it matters) they
+ * will also see our critical section.
+ */
+ this_cpu_dec(*sem->read_count);
+ rcuwait_wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read_slowpath);
--
2.47.3
On Wed, Mar 04, 2026 at 04:56:16PM +0000, Dmitry Ilvokhin wrote:
> Move the percpu_up_read() slowpath out of the inline function into a new
> __percpu_up_read_slowpath() to avoid binary size increase from adding a
> tracepoint to an inlined function.
>
> Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
> include/linux/percpu-rwsem.h | 15 +++------------
> kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++
> 2 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
> index c8cb010d655e..89506895365c 100644
> --- a/include/linux/percpu-rwsem.h
> +++ b/include/linux/percpu-rwsem.h
> @@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
> return ret;
> }
>
> +void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem);
> +
extern for consistency with all the other declarations in this header.
s/_slowpath//, the corresponding down function also doesn't have
_slowpath on.
> static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
> {
> rwsem_release(&sem->dep_map, _RET_IP_);
On Wed, Mar 04, 2026 at 11:02:23PM +0100, Peter Zijlstra wrote: > On Wed, Mar 04, 2026 at 04:56:16PM +0000, Dmitry Ilvokhin wrote: > > Move the percpu_up_read() slowpath out of the inline function into a new > > __percpu_up_read_slowpath() to avoid binary size increase from adding a > > tracepoint to an inlined function. > > > > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com> > > --- > > include/linux/percpu-rwsem.h | 15 +++------------ > > kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++ > > 2 files changed, 21 insertions(+), 12 deletions(-) > > > > diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h > > index c8cb010d655e..89506895365c 100644 > > --- a/include/linux/percpu-rwsem.h > > +++ b/include/linux/percpu-rwsem.h > > @@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) > > return ret; > > } > > > > +void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem); > > + > > extern for consistency with all the other declarations in this header. > > s/_slowpath//, the corresponding down function also doesn't have > _slowpath on. Thanks for the feedback, Peter. Applied both suggestions locally.
On Wed, 4 Mar 2026 23:02:23 +0100
Peter Zijlstra <peterz@infradead.org> wrote:
> > diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
> > index c8cb010d655e..89506895365c 100644
> > --- a/include/linux/percpu-rwsem.h
> > +++ b/include/linux/percpu-rwsem.h
> > @@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
> > return ret;
> > }
> >
> > +void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem);
> > +
>
> extern for consistency with all the other declarations in this header.
I wonder if a cleanup patch should be added to remove the "extern" from the
other functions, as that tends to be the way things are going (hch just
recommended it elsewhere).
>
> s/_slowpath//, the corresponding down function also doesn't have
> _slowpath on.
>
> > static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
> > {
> > rwsem_release(&sem->dep_map, _RET_IP_);
And since "slowpath" is more descriptive (and used in the rtmutex code),
should that be added too?
-- Steve
On Thu, Mar 05, 2026 at 10:47:03AM -0500, Steven Rostedt wrote:
> On Wed, 4 Mar 2026 23:02:23 +0100
> Peter Zijlstra <peterz@infradead.org> wrote:
>
> > > diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
> > > index c8cb010d655e..89506895365c 100644
> > > --- a/include/linux/percpu-rwsem.h
> > > +++ b/include/linux/percpu-rwsem.h
> > > @@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
> > > return ret;
> > > }
> > >
> > > +void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem);
> > > +
> >
> > extern for consistency with all the other declarations in this header.
>
> I wonder if a cleanup patch should be added to remove the "extern" from the
> other functions, as that tends to be the way things are going (hch just
> recommended it elsewhere).
Well, I rather like the extern. But yeah, I know hch does not agree.
> >
> > s/_slowpath//, the corresponding down function also doesn't have
> > _slowpath on.
> >
> > > static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
> > > {
> > > rwsem_release(&sem->dep_map, _RET_IP_);
>
> And since "slowpath" is more descriptive (and used in the rtmutex code),
> should that be added too?
It already has __ prefix, no point in making the name even longer for no
real benefit.
© 2016 - 2026 Red Hat, Inc.