[PATCH 3/3] x86/alternative: Patch a single alternative location only once

Juergen Gross posted 3 patches 2 days, 11 hours ago
[PATCH 3/3] x86/alternative: Patch a single alternative location only once
Posted by Juergen Gross 2 days, 11 hours ago
Instead of patching a single location potentially multiple times in
case of nested ALTERNATIVE()s, do the patching only after having
evaluated all alt_instr instances for that location.

This has multiple advantages:

- In case of replacing an indirect with a direct call using the
  ALT_FLAG_DIRECT_CALL flag, there is no longer the need to have that
  instance before any other instances at the same location (the
  original instruction is needed for finding the target of the direct
  call).

- In case of nested ALTERNATIVE()s there is no intermediate replacement
  visible. This avoids any problems in case e.g. an interrupt is
  happening between the single instances and the patched location is
  used during handling the interrupt.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- new patch
---
 arch/x86/kernel/alternative.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 735cc017f2d3..ccf07131cd47 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -648,6 +648,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 	u8 insn_buff[MAX_PATCH_LEN];
 	u8 *instr;
 	struct alt_instr *a, *b;
+	unsigned int instances = 0;
+	bool patched = false;
 
 	DPRINTK(ALT, "alt table %px, -> %px", start, end);
 
@@ -677,9 +679,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		 * padding for all alt_instr entries for this site (nested
 		 * alternatives result in consecutive entries).
 		 */
-		for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
-			u8 len = max(a->instrlen, b->instrlen);
-			a->instrlen = b->instrlen = len;
+		if (!instances) {
+			for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
+				u8 len = max(a->instrlen, b->instrlen);
+				a->instrlen = b->instrlen = len;
+			}
+			instances = b - a;
+			patched = false;
 		}
 
 		instr = instr_va(a);
@@ -692,14 +698,19 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		 * - feature not present but ALT_FLAG_NOT is set to mean,
 		 *   patch if feature is *NOT* present.
 		 */
-		if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
-			memcpy(insn_buff, instr, a->instrlen);
-			optimize_nops(instr, insn_buff, a->instrlen);
-		} else {
+		if (!boot_cpu_has(a->cpuid) != !(a->flags & ALT_FLAG_NOT)) {
 			apply_one_alternative(instr, insn_buff, a);
+			patched = true;
 		}
 
-		text_poke_early(instr, insn_buff, a->instrlen);
+		instances--;
+		if (!instances) {
+			if (!patched) {
+				memcpy(insn_buff, instr, a->instrlen);
+				optimize_nops(instr, insn_buff, a->instrlen);
+			}
+			text_poke_early(instr, insn_buff, a->instrlen);
+		}
 	}
 
 	kasan_enable_current();
-- 
2.51.0
Re: [PATCH 3/3] x86/alternative: Patch a single alternative location only once
Posted by Peter Zijlstra 1 day, 15 hours ago
On Mon, Sep 29, 2025 at 01:29:47PM +0200, Juergen Gross wrote:
> Instead of patching a single location potentially multiple times in
> case of nested ALTERNATIVE()s, do the patching only after having
> evaluated all alt_instr instances for that location.
> 
> This has multiple advantages:
> 
> - In case of replacing an indirect with a direct call using the
>   ALT_FLAG_DIRECT_CALL flag, there is no longer the need to have that
>   instance before any other instances at the same location (the
>   original instruction is needed for finding the target of the direct
>   call).
> 
> - In case of nested ALTERNATIVE()s there is no intermediate replacement
>   visible. This avoids any problems in case e.g. an interrupt is
>   happening between the single instances and the patched location is
>   used during handling the interrupt.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V2:
> - new patch
> ---
>  arch/x86/kernel/alternative.c | 27 +++++++++++++++++++--------
>  1 file changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 735cc017f2d3..ccf07131cd47 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -648,6 +648,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
>  	u8 insn_buff[MAX_PATCH_LEN];
>  	u8 *instr;
>  	struct alt_instr *a, *b;
> +	unsigned int instances = 0;
> +	bool patched = false;
>  
>  	DPRINTK(ALT, "alt table %px, -> %px", start, end);
>  
> @@ -677,9 +679,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
>  		 * padding for all alt_instr entries for this site (nested
>  		 * alternatives result in consecutive entries).
>  		 */
> -		for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
> -			u8 len = max(a->instrlen, b->instrlen);
> -			a->instrlen = b->instrlen = len;
> +		if (!instances) {
> +			for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
> +				u8 len = max(a->instrlen, b->instrlen);
> +				a->instrlen = b->instrlen = len;
> +			}
> +			instances = b - a;
> +			patched = false;
>  		}
>  
>  		instr = instr_va(a);
> @@ -692,14 +698,19 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
>  		 * - feature not present but ALT_FLAG_NOT is set to mean,
>  		 *   patch if feature is *NOT* present.
>  		 */
> -		if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
> -			memcpy(insn_buff, instr, a->instrlen);
> -			optimize_nops(instr, insn_buff, a->instrlen);
> -		} else {
> +		if (!boot_cpu_has(a->cpuid) != !(a->flags & ALT_FLAG_NOT)) {
>  			apply_one_alternative(instr, insn_buff, a);
> +			patched = true;
>  		}
>  
> -		text_poke_early(instr, insn_buff, a->instrlen);
> +		instances--;
> +		if (!instances) {
> +			if (!patched) {
> +				memcpy(insn_buff, instr, a->instrlen);
> +				optimize_nops(instr, insn_buff, a->instrlen);
> +			}
> +			text_poke_early(instr, insn_buff, a->instrlen);
> +		}
>  	}
>  
>  	kasan_enable_current();

I think you lost the optimize_nops() call for the patched case.

That is, note how apply_one_alternative() does 0x90 padding, but then
you only do optimize_nops() when !patched.
Re: [PATCH 3/3] x86/alternative: Patch a single alternative location only once
Posted by Jürgen Groß 1 day, 15 hours ago
On 30.09.25 09:26, Peter Zijlstra wrote:
> On Mon, Sep 29, 2025 at 01:29:47PM +0200, Juergen Gross wrote:
>> Instead of patching a single location potentially multiple times in
>> case of nested ALTERNATIVE()s, do the patching only after having
>> evaluated all alt_instr instances for that location.
>>
>> This has multiple advantages:
>>
>> - In case of replacing an indirect with a direct call using the
>>    ALT_FLAG_DIRECT_CALL flag, there is no longer the need to have that
>>    instance before any other instances at the same location (the
>>    original instruction is needed for finding the target of the direct
>>    call).
>>
>> - In case of nested ALTERNATIVE()s there is no intermediate replacement
>>    visible. This avoids any problems in case e.g. an interrupt is
>>    happening between the single instances and the patched location is
>>    used during handling the interrupt.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> V2:
>> - new patch
>> ---
>>   arch/x86/kernel/alternative.c | 27 +++++++++++++++++++--------
>>   1 file changed, 19 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
>> index 735cc017f2d3..ccf07131cd47 100644
>> --- a/arch/x86/kernel/alternative.c
>> +++ b/arch/x86/kernel/alternative.c
>> @@ -648,6 +648,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
>>   	u8 insn_buff[MAX_PATCH_LEN];
>>   	u8 *instr;
>>   	struct alt_instr *a, *b;
>> +	unsigned int instances = 0;
>> +	bool patched = false;
>>   
>>   	DPRINTK(ALT, "alt table %px, -> %px", start, end);
>>   
>> @@ -677,9 +679,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
>>   		 * padding for all alt_instr entries for this site (nested
>>   		 * alternatives result in consecutive entries).
>>   		 */
>> -		for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
>> -			u8 len = max(a->instrlen, b->instrlen);
>> -			a->instrlen = b->instrlen = len;
>> +		if (!instances) {
>> +			for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
>> +				u8 len = max(a->instrlen, b->instrlen);
>> +				a->instrlen = b->instrlen = len;
>> +			}
>> +			instances = b - a;
>> +			patched = false;
>>   		}
>>   
>>   		instr = instr_va(a);
>> @@ -692,14 +698,19 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
>>   		 * - feature not present but ALT_FLAG_NOT is set to mean,
>>   		 *   patch if feature is *NOT* present.
>>   		 */
>> -		if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
>> -			memcpy(insn_buff, instr, a->instrlen);
>> -			optimize_nops(instr, insn_buff, a->instrlen);
>> -		} else {
>> +		if (!boot_cpu_has(a->cpuid) != !(a->flags & ALT_FLAG_NOT)) {
>>   			apply_one_alternative(instr, insn_buff, a);
>> +			patched = true;
>>   		}
>>   
>> -		text_poke_early(instr, insn_buff, a->instrlen);
>> +		instances--;
>> +		if (!instances) {
>> +			if (!patched) {
>> +				memcpy(insn_buff, instr, a->instrlen);
>> +				optimize_nops(instr, insn_buff, a->instrlen);
>> +			}
>> +			text_poke_early(instr, insn_buff, a->instrlen);
>> +		}
>>   	}
>>   
>>   	kasan_enable_current();
> 
> I think you lost the optimize_nops() call for the patched case.
> 
> That is, note how apply_one_alternative() does 0x90 padding, but then
> you only do optimize_nops() when !patched.

The call of optimize_nops() is part of text_poke_apply_relocation() when
patching, like without my series.


Juergen
Re: [PATCH 3/3] x86/alternative: Patch a single alternative location only once
Posted by Peter Zijlstra 1 day, 14 hours ago
On Tue, Sep 30, 2025 at 09:33:25AM +0200, Jürgen Groß wrote:
> On 30.09.25 09:26, Peter Zijlstra wrote:

> > > @@ -692,14 +698,19 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
> > >   		 * - feature not present but ALT_FLAG_NOT is set to mean,
> > >   		 *   patch if feature is *NOT* present.
> > >   		 */
> > > -		if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
> > > -			memcpy(insn_buff, instr, a->instrlen);
> > > -			optimize_nops(instr, insn_buff, a->instrlen);
> > > -		} else {
> > > +		if (!boot_cpu_has(a->cpuid) != !(a->flags & ALT_FLAG_NOT)) {
> > >   			apply_one_alternative(instr, insn_buff, a);
> > > +			patched = true;
> > >   		}
> > > -		text_poke_early(instr, insn_buff, a->instrlen);
> > > +		instances--;
> > > +		if (!instances) {
> > > +			if (!patched) {
> > > +				memcpy(insn_buff, instr, a->instrlen);
> > > +				optimize_nops(instr, insn_buff, a->instrlen);
> > > +			}
> > > +			text_poke_early(instr, insn_buff, a->instrlen);
> > > +		}
> > >   	}
> > >   	kasan_enable_current();
> > 
> > I think you lost the optimize_nops() call for the patched case.
> > 
> > That is, note how apply_one_alternative() does 0x90 padding, but then
> > you only do optimize_nops() when !patched.
> 
> The call of optimize_nops() is part of text_poke_apply_relocation() when
> patching, like without my series.

Indeed it is. Clearly I need more wakeup juice :-)

Therefore the patches seem fine. I'll try and hold onto them until the
merge window closes and then stick them in x86/core or thereabout.
Re: [PATCH 3/3] x86/alternative: Patch a single alternative location only once
Posted by Jürgen Groß 1 day, 14 hours ago
On 30.09.25 09:39, Peter Zijlstra wrote:
> On Tue, Sep 30, 2025 at 09:33:25AM +0200, Jürgen Groß wrote:
>> On 30.09.25 09:26, Peter Zijlstra wrote:
> 
>>>> @@ -692,14 +698,19 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
>>>>    		 * - feature not present but ALT_FLAG_NOT is set to mean,
>>>>    		 *   patch if feature is *NOT* present.
>>>>    		 */
>>>> -		if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
>>>> -			memcpy(insn_buff, instr, a->instrlen);
>>>> -			optimize_nops(instr, insn_buff, a->instrlen);
>>>> -		} else {
>>>> +		if (!boot_cpu_has(a->cpuid) != !(a->flags & ALT_FLAG_NOT)) {
>>>>    			apply_one_alternative(instr, insn_buff, a);
>>>> +			patched = true;
>>>>    		}
>>>> -		text_poke_early(instr, insn_buff, a->instrlen);
>>>> +		instances--;
>>>> +		if (!instances) {
>>>> +			if (!patched) {
>>>> +				memcpy(insn_buff, instr, a->instrlen);
>>>> +				optimize_nops(instr, insn_buff, a->instrlen);
>>>> +			}
>>>> +			text_poke_early(instr, insn_buff, a->instrlen);
>>>> +		}
>>>>    	}
>>>>    	kasan_enable_current();
>>>
>>> I think you lost the optimize_nops() call for the patched case.
>>>
>>> That is, note how apply_one_alternative() does 0x90 padding, but then
>>> you only do optimize_nops() when !patched.
>>
>> The call of optimize_nops() is part of text_poke_apply_relocation() when
>> patching, like without my series.
> 
> Indeed it is. Clearly I need more wakeup juice :-)
> 
> Therefore the patches seem fine. I'll try and hold onto them until the
> merge window closes and then stick them in x86/core or thereabout.

Thanks.


Juergen