There are four recursion counter, one for each context. The type of the
counter is `int' but the counter is used as `bool' since it is only
incremented if zero.
Reduce the type of the recursion counter to an unsigned char, keep the
increment/ decrement operation.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/events/callchain.c | 2 +-
kernel/events/core.c | 2 +-
kernel/events/internal.h | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 1273be84392cf..ad57944b6c40e 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -29,7 +29,7 @@ static inline size_t perf_callchain_entry__sizeof(void)
sysctl_perf_event_max_contexts_per_stack));
}
-static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
+static DEFINE_PER_CPU(u8, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
static struct callchain_cpus_entries *callchain_cpus_entries;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6256a9593c3da..f48ce05907042 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9781,7 +9781,7 @@ struct swevent_htable {
int hlist_refcount;
/* Recursion avoidance in each contexts */
- int recursion[PERF_NR_CONTEXTS];
+ u8 recursion[PERF_NR_CONTEXTS];
};
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 5150d5f84c033..f9a3244206b20 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -208,7 +208,7 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n)
DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
-static inline int get_recursion_context(int *recursion)
+static inline int get_recursion_context(u8 *recursion)
{
unsigned char rctx = interrupt_context_level();
@@ -221,7 +221,7 @@ static inline int get_recursion_context(int *recursion)
return rctx;
}
-static inline void put_recursion_context(int *recursion, int rctx)
+static inline void put_recursion_context(u8 *recursion, int rctx)
{
barrier();
recursion[rctx]--;
--
2.45.2
On Mon, Jun 24, 2024 at 05:15:16PM +0200, Sebastian Andrzej Siewior wrote: > There are four recursion counter, one for each context. The type of the > counter is `int' but the counter is used as `bool' since it is only > incremented if zero. > > Reduce the type of the recursion counter to an unsigned char, keep the > increment/ decrement operation. Does this actually matter? Aren't u8 memops encoded by longer instructions etc..
On 2024-07-01 14:31:37 [+0200], Peter Zijlstra wrote:
> On Mon, Jun 24, 2024 at 05:15:16PM +0200, Sebastian Andrzej Siewior wrote:
> > There are four recursion counter, one for each context. The type of the
> > counter is `int' but the counter is used as `bool' since it is only
> > incremented if zero.
> >
> > Reduce the type of the recursion counter to an unsigned char, keep the
> > increment/ decrement operation.
>
> Does this actually matter? Aren't u8 memops encoded by longer
> instructions etc..
The goal here isn't to reduce the opcodes but to add it to task_struct
without making it larger by filling a hole.
But since you made me look at assembly:
old:
316b: 65 48 8b 15 00 00 00 mov %gs:0x0(%rip),%rdx # 3173 <perf_swevent_get_recursion_context+0x33>
3173: 1c ff sbb $0xff,%al
3175: 48 0f be c8 movsbq %al,%rcx
3179: 48 8d 94 8a 00 00 00 lea 0x0(%rdx,%rcx,4),%rdx
3180: 00
317d: R_X86_64_32S .data..percpu+0x4c
3181: 8b 0a mov (%rdx),%ecx
3183: 85 c9 test %ecx,%ecx
3185: 75 0e jne 3195 <perf_swevent_get_recursion_context+0x55>
3187: c7 02 01 00 00 00 movl $0x1,(%rdx)
^^^
318d: 0f be c0 movsbl %al,%eax
new:
2ff8: 1c ff sbb $0xff,%al
2ffa: 81 e2 00 01 ff 00 and $0xff0100,%edx
3000: 83 fa 01 cmp $0x1,%edx
3003: 1c ff sbb $0xff,%al
3005: 48 0f be d0 movsbq %al,%rdx
3009: 48 8d 94 11 00 00 00 lea 0x0(%rcx,%rdx,1),%rdx
3010: 00
300d: R_X86_64_32S .data..percpu+0x4c
3011: 80 3a 00 cmpb $0x0,(%rdx)
3014: 75 0b jne 3021 <perf_swevent_get_recursion_context+0x51>
3016: c6 02 01 movb $0x1,(%rdx)
^^^
3019: 0f be c0 movsbl %al,%eax
301c: e9 00 00 00 00 jmp 3021 <perf_swevent_get_recursion_context+0x51>
So we do even save a few bytes. We could avoid the "movsbl" at 3019 by
making the return type `unsigned char' ;)
Sebastian
On Mon, Jul 01, 2024 at 02:56:43PM +0200, Sebastian Andrzej Siewior wrote: > On 2024-07-01 14:31:37 [+0200], Peter Zijlstra wrote: > > On Mon, Jun 24, 2024 at 05:15:16PM +0200, Sebastian Andrzej Siewior wrote: > > > There are four recursion counter, one for each context. The type of the > > > counter is `int' but the counter is used as `bool' since it is only > > > incremented if zero. > > > > > > Reduce the type of the recursion counter to an unsigned char, keep the > > > increment/ decrement operation. > > > > Does this actually matter? Aren't u8 memops encoded by longer > > instructions etc.. > > The goal here isn't to reduce the opcodes but to add it to task_struct > without making it larger by filling a hole. Changelog failed to mention this crucial fact.
© 2016 - 2025 Red Hat, Inc.