In most cases, ti_work values passed to arch_exit_to_user_mode_prepare()
are zeros, e.g., 99% in kernel build tests. So an obvious optimization
is to test ti_work for zero before processing individual bits in it.
In addition, Intel 0day tests find no perf regression with this change.
Suggested-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
arch/x86/include/asm/entry-common.h | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index fb2809b20b0a..4c78b99060b5 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -47,15 +47,17 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
- if (ti_work & _TIF_USER_RETURN_NOTIFY)
- fire_user_return_notifiers();
+ if (unlikely(ti_work)) {
+ if (ti_work & _TIF_USER_RETURN_NOTIFY)
+ fire_user_return_notifiers();
- if (unlikely(ti_work & _TIF_IO_BITMAP))
- tss_update_io_bitmap();
+ if (unlikely(ti_work & _TIF_IO_BITMAP))
+ tss_update_io_bitmap();
- fpregs_assert_state_consistent();
- if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
- switch_fpu_return();
+ fpregs_assert_state_consistent();
+ if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+ }
#ifdef CONFIG_COMPAT
/*
--
2.45.2
On Tue, Aug 06 2024 at 22:47, Xin Li wrote:
> In most cases, ti_work values passed to arch_exit_to_user_mode_prepare()
> are zeros, e.g., 99% in kernel build tests. So an obvious optimization
> is to test ti_work for zero before processing individual bits in it.
>
> In addition, Intel 0day tests find no perf regression with this change.
>
> Suggested-by: H. Peter Anvin (Intel) <hpa@zytor.com>
> Signed-off-by: Xin Li (Intel) <xin@zytor.com>
> ---
> arch/x86/include/asm/entry-common.h | 16 +++++++++-------
> 1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
> index fb2809b20b0a..4c78b99060b5 100644
> --- a/arch/x86/include/asm/entry-common.h
> +++ b/arch/x86/include/asm/entry-common.h
> @@ -47,15 +47,17 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
> static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
> unsigned long ti_work)
> {
> - if (ti_work & _TIF_USER_RETURN_NOTIFY)
> - fire_user_return_notifiers();
> + if (unlikely(ti_work)) {
> + if (ti_work & _TIF_USER_RETURN_NOTIFY)
> + fire_user_return_notifiers();
>
> - if (unlikely(ti_work & _TIF_IO_BITMAP))
> - tss_update_io_bitmap();
> + if (unlikely(ti_work & _TIF_IO_BITMAP))
> + tss_update_io_bitmap();
>
> - fpregs_assert_state_consistent();
Please keep this unconditional and independent of ti_work. It's a debug
feature and you kill coverage with making it conditional on ti_work.
Thanks,
tglx
On 8/7/2024 11:08 AM, Thomas Gleixner wrote:
>> - fpregs_assert_state_consistent();
> Please keep this unconditional and independent of ti_work. It's a debug
> feature and you kill coverage with making it conditional on ti_work.
Sigh, I'm an idiot.
Thanks!
Xin
On Wed, Aug 07 2024 at 20:08, Thomas Gleixner wrote:
> On Tue, Aug 06 2024 at 22:47, Xin Li wrote:
>> In most cases, ti_work values passed to arch_exit_to_user_mode_prepare()
>> are zeros, e.g., 99% in kernel build tests. So an obvious optimization
>> is to test ti_work for zero before processing individual bits in it.
>>
>> In addition, Intel 0day tests find no perf regression with this change.
>>
>> Suggested-by: H. Peter Anvin (Intel) <hpa@zytor.com>
>> Signed-off-by: Xin Li (Intel) <xin@zytor.com>
>> ---
>> arch/x86/include/asm/entry-common.h | 16 +++++++++-------
>> 1 file changed, 9 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
>> index fb2809b20b0a..4c78b99060b5 100644
>> --- a/arch/x86/include/asm/entry-common.h
>> +++ b/arch/x86/include/asm/entry-common.h
>> @@ -47,15 +47,17 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
>> static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
>> unsigned long ti_work)
>> {
>> - if (ti_work & _TIF_USER_RETURN_NOTIFY)
>> - fire_user_return_notifiers();
>> + if (unlikely(ti_work)) {
>> + if (ti_work & _TIF_USER_RETURN_NOTIFY)
>> + fire_user_return_notifiers();
>>
>> - if (unlikely(ti_work & _TIF_IO_BITMAP))
>> - tss_update_io_bitmap();
>> + if (unlikely(ti_work & _TIF_IO_BITMAP))
>> + tss_update_io_bitmap();
>>
>> - fpregs_assert_state_consistent();
>
> Please keep this unconditional and independent of ti_work. It's a debug
> feature and you kill coverage with making it conditional on ti_work.
Also spare the extra indentation level and do:
static inline void arch_exit_work(unsigned long ti_work)
{
if (ti_work & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
if (unlikely(ti_work & _TIF_IO_BITMAP))
tss_update_io_bitmap();
fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
}
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
arch_exit_work(ti_work);
...
Thanks,
tglx
On Wed, Aug 7, 2024 at 1:51 AM Xin Li (Intel) <xin@zytor.com> wrote:
>
> In most cases, ti_work values passed to arch_exit_to_user_mode_prepare()
> are zeros, e.g., 99% in kernel build tests. So an obvious optimization
> is to test ti_work for zero before processing individual bits in it.
>
> In addition, Intel 0day tests find no perf regression with this change.
>
> Suggested-by: H. Peter Anvin (Intel) <hpa@zytor.com>
> Signed-off-by: Xin Li (Intel) <xin@zytor.com>
> ---
> arch/x86/include/asm/entry-common.h | 16 +++++++++-------
> 1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
> index fb2809b20b0a..4c78b99060b5 100644
> --- a/arch/x86/include/asm/entry-common.h
> +++ b/arch/x86/include/asm/entry-common.h
> @@ -47,15 +47,17 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
> static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
> unsigned long ti_work)
> {
> - if (ti_work & _TIF_USER_RETURN_NOTIFY)
> - fire_user_return_notifiers();
> + if (unlikely(ti_work)) {
> + if (ti_work & _TIF_USER_RETURN_NOTIFY)
> + fire_user_return_notifiers();
>
> - if (unlikely(ti_work & _TIF_IO_BITMAP))
> - tss_update_io_bitmap();
> + if (unlikely(ti_work & _TIF_IO_BITMAP))
> + tss_update_io_bitmap();
>
> - fpregs_assert_state_consistent();
> - if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
> - switch_fpu_return();
> + fpregs_assert_state_consistent();
This call was originally unconditional, and does nothing if
TIF_NEED_FPU_LOAD is set.
> + if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
> + switch_fpu_return();
> + }
>
> #ifdef CONFIG_COMPAT
> /*
> --
> 2.45.2
>
>
Brian Gerst
On 8/7/2024 9:21 AM, Brian Gerst wrote: >> + fpregs_assert_state_consistent(); > This call was originally unconditional, and does nothing if > TIF_NEED_FPU_LOAD is set. lost my mind! Thanks!
© 2016 - 2025 Red Hat, Inc.