x86 is the only architecture wanting an optimisation here, but the
test_and_set_bit() is a store into the monitored line and is racy with
determining whether an IPI can be skipped.
Instead, implement a new arch helper with different semantics; to make the
softirq pending and decide about IPIs together. For now, implement the
default helper. It will be overridden by x86 in a subsequent change.
No functional change.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Anthony PERARD <anthony.perard@vates.tech>
CC: Michal Orzel <michal.orzel@amd.com>
CC: Julien Grall <julien@xen.org>
CC: Stefano Stabellini <sstabellini@kernel.org>
---
xen/arch/x86/acpi/cpu_idle.c | 5 -----
xen/arch/x86/include/asm/softirq.h | 2 --
xen/common/softirq.c | 8 ++------
xen/include/xen/softirq.h | 16 ++++++++++++++++
4 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index dbcb80d81db9..caa0fef0b3b1 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -436,11 +436,6 @@ static int __init cf_check cpu_idle_key_init(void)
}
__initcall(cpu_idle_key_init);
-bool arch_skip_send_event_check(unsigned int cpu)
-{
- return false;
-}
-
void mwait_idle_with_hints(unsigned int eax, unsigned int ecx)
{
unsigned int cpu = smp_processor_id();
diff --git a/xen/arch/x86/include/asm/softirq.h b/xen/arch/x86/include/asm/softirq.h
index 415ee866c79d..e4b194f069fb 100644
--- a/xen/arch/x86/include/asm/softirq.h
+++ b/xen/arch/x86/include/asm/softirq.h
@@ -9,6 +9,4 @@
#define HVM_DPCI_SOFTIRQ (NR_COMMON_SOFTIRQS + 4)
#define NR_ARCH_SOFTIRQS 5
-bool arch_skip_send_event_check(unsigned int cpu);
-
#endif /* __ASM_SOFTIRQ_H__ */
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
index 60f344e8425e..0368011f95d1 100644
--- a/xen/common/softirq.c
+++ b/xen/common/softirq.c
@@ -94,9 +94,7 @@ void cpumask_raise_softirq(const cpumask_t *mask, unsigned int nr)
raise_mask = &per_cpu(batch_mask, this_cpu);
for_each_cpu(cpu, mask)
- if ( !test_and_set_bit(nr, &softirq_pending(cpu)) &&
- cpu != this_cpu &&
- !arch_skip_send_event_check(cpu) )
+ if ( !arch_pend_softirq(nr, cpu) && cpu != this_cpu )
__cpumask_set_cpu(cpu, raise_mask);
if ( raise_mask == &send_mask )
@@ -107,9 +105,7 @@ void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
{
unsigned int this_cpu = smp_processor_id();
- if ( test_and_set_bit(nr, &softirq_pending(cpu))
- || (cpu == this_cpu)
- || arch_skip_send_event_check(cpu) )
+ if ( arch_pend_softirq(nr, cpu) || cpu == this_cpu )
return;
if ( !per_cpu(batching, this_cpu) || in_irq() )
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index e9f79ec0ce3e..0814c8e5cd41 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -23,6 +23,22 @@ enum {
#define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
+/*
+ * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be
+ * skipped, false if the IPI cannot be skipped.
+ */
+#ifndef arch_pend_softirq
+static always_inline bool arch_pend_softirq(unsigned int nr, unsigned int cpu)
+{
+ /*
+ * Try to set the softirq pending. If we set the bit (i.e. the old bit
+ * was 0), we're responsible to send the IPI. If the softirq was already
+ * pending (i.e. the old bit was 1), no IPI is needed.
+ */
+ return test_and_set_bit(nr, &softirq_pending(cpu));
+}
+#endif
+
typedef void (*softirq_handler)(void);
void do_softirq(void);
--
2.39.5
On Wed, Jul 02, 2025 at 03:41:18PM +0100, Andrew Cooper wrote:
> x86 is the only architecture wanting an optimisation here, but the
> test_and_set_bit() is a store into the monitored line and is racy with
> determining whether an IPI can be skipped.
>
> Instead, implement a new arch helper with different semantics; to make the
> softirq pending and decide about IPIs together. For now, implement the
> default helper. It will be overridden by x86 in a subsequent change.
>
> No functional change.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
With the adjusted commit message you proposed to Jan:
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Just one nit below to comment something.
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Anthony PERARD <anthony.perard@vates.tech>
> CC: Michal Orzel <michal.orzel@amd.com>
> CC: Julien Grall <julien@xen.org>
> CC: Stefano Stabellini <sstabellini@kernel.org>
> ---
> xen/arch/x86/acpi/cpu_idle.c | 5 -----
> xen/arch/x86/include/asm/softirq.h | 2 --
> xen/common/softirq.c | 8 ++------
> xen/include/xen/softirq.h | 16 ++++++++++++++++
> 4 files changed, 18 insertions(+), 13 deletions(-)
>
> diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
> index dbcb80d81db9..caa0fef0b3b1 100644
> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -436,11 +436,6 @@ static int __init cf_check cpu_idle_key_init(void)
> }
> __initcall(cpu_idle_key_init);
>
> -bool arch_skip_send_event_check(unsigned int cpu)
> -{
> - return false;
> -}
> -
> void mwait_idle_with_hints(unsigned int eax, unsigned int ecx)
> {
> unsigned int cpu = smp_processor_id();
> diff --git a/xen/arch/x86/include/asm/softirq.h b/xen/arch/x86/include/asm/softirq.h
> index 415ee866c79d..e4b194f069fb 100644
> --- a/xen/arch/x86/include/asm/softirq.h
> +++ b/xen/arch/x86/include/asm/softirq.h
> @@ -9,6 +9,4 @@
> #define HVM_DPCI_SOFTIRQ (NR_COMMON_SOFTIRQS + 4)
> #define NR_ARCH_SOFTIRQS 5
>
> -bool arch_skip_send_event_check(unsigned int cpu);
> -
> #endif /* __ASM_SOFTIRQ_H__ */
> diff --git a/xen/common/softirq.c b/xen/common/softirq.c
> index 60f344e8425e..0368011f95d1 100644
> --- a/xen/common/softirq.c
> +++ b/xen/common/softirq.c
> @@ -94,9 +94,7 @@ void cpumask_raise_softirq(const cpumask_t *mask, unsigned int nr)
> raise_mask = &per_cpu(batch_mask, this_cpu);
>
> for_each_cpu(cpu, mask)
> - if ( !test_and_set_bit(nr, &softirq_pending(cpu)) &&
> - cpu != this_cpu &&
> - !arch_skip_send_event_check(cpu) )
> + if ( !arch_pend_softirq(nr, cpu) && cpu != this_cpu )
> __cpumask_set_cpu(cpu, raise_mask);
>
> if ( raise_mask == &send_mask )
> @@ -107,9 +105,7 @@ void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
> {
> unsigned int this_cpu = smp_processor_id();
>
> - if ( test_and_set_bit(nr, &softirq_pending(cpu))
> - || (cpu == this_cpu)
> - || arch_skip_send_event_check(cpu) )
> + if ( arch_pend_softirq(nr, cpu) || cpu == this_cpu )
> return;
>
> if ( !per_cpu(batching, this_cpu) || in_irq() )
> diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
> index e9f79ec0ce3e..0814c8e5cd41 100644
> --- a/xen/include/xen/softirq.h
> +++ b/xen/include/xen/softirq.h
> @@ -23,6 +23,22 @@ enum {
>
> #define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
>
> +/*
> + * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be
> + * skipped, false if the IPI cannot be skipped.
> + */
> +#ifndef arch_pend_softirq
> +static always_inline bool arch_pend_softirq(unsigned int nr, unsigned int cpu)
Nit: I would maybe it arch_set_softirq(), I find `pend` not that clear
(I would rather fully spell `pending` instead).
Thanks, Roger.
On 03.07.2025 18:21, Roger Pau Monné wrote:
> On Wed, Jul 02, 2025 at 03:41:18PM +0100, Andrew Cooper wrote:
>> --- a/xen/include/xen/softirq.h
>> +++ b/xen/include/xen/softirq.h
>> @@ -23,6 +23,22 @@ enum {
>>
>> #define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
>>
>> +/*
>> + * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be
>> + * skipped, false if the IPI cannot be skipped.
>> + */
>> +#ifndef arch_pend_softirq
>> +static always_inline bool arch_pend_softirq(unsigned int nr, unsigned int cpu)
>
> Nit: I would maybe it arch_set_softirq(), I find `pend` not that clear
> (I would rather fully spell `pending` instead).
I, too, did wonder about the naming here. But using "pending" as you suggest
has the effect of giving the function a name we would normally associate with
a predicate ("Is it pending?"), whereas here the function is used to _mark_ a
softirq as pending. Hence in the end I didn't comment at all; I'd be fine
with "set", but I'm also okay with "pend".
Jan
On Fri, Jul 04, 2025 at 09:23:29AM +0200, Jan Beulich wrote:
> On 03.07.2025 18:21, Roger Pau Monné wrote:
> > On Wed, Jul 02, 2025 at 03:41:18PM +0100, Andrew Cooper wrote:
> >> --- a/xen/include/xen/softirq.h
> >> +++ b/xen/include/xen/softirq.h
> >> @@ -23,6 +23,22 @@ enum {
> >>
> >> #define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
> >>
> >> +/*
> >> + * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be
> >> + * skipped, false if the IPI cannot be skipped.
> >> + */
> >> +#ifndef arch_pend_softirq
> >> +static always_inline bool arch_pend_softirq(unsigned int nr, unsigned int cpu)
> >
> > Nit: I would maybe it arch_set_softirq(), I find `pend` not that clear
> > (I would rather fully spell `pending` instead).
>
> I, too, did wonder about the naming here. But using "pending" as you suggest
> has the effect of giving the function a name we would normally associate with
> a predicate ("Is it pending?"), whereas here the function is used to _mark_ a
> softirq as pending. Hence in the end I didn't comment at all; I'd be fine
> with "set", but I'm also okay with "pend".
It's a set and check kind of function, so I don't care much. Just
found the pend a bit too short, I don't think we usually abbreviate
pending to pend. In fact we use pend in more than one variable name
to store the end of a physical memory region.
Thanks, Roger.
On 04.07.2025 09:55, Roger Pau Monné wrote:
> On Fri, Jul 04, 2025 at 09:23:29AM +0200, Jan Beulich wrote:
>> On 03.07.2025 18:21, Roger Pau Monné wrote:
>>> On Wed, Jul 02, 2025 at 03:41:18PM +0100, Andrew Cooper wrote:
>>>> --- a/xen/include/xen/softirq.h
>>>> +++ b/xen/include/xen/softirq.h
>>>> @@ -23,6 +23,22 @@ enum {
>>>>
>>>> #define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
>>>>
>>>> +/*
>>>> + * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be
>>>> + * skipped, false if the IPI cannot be skipped.
>>>> + */
>>>> +#ifndef arch_pend_softirq
>>>> +static always_inline bool arch_pend_softirq(unsigned int nr, unsigned int cpu)
>>>
>>> Nit: I would maybe it arch_set_softirq(), I find `pend` not that clear
>>> (I would rather fully spell `pending` instead).
>>
>> I, too, did wonder about the naming here. But using "pending" as you suggest
>> has the effect of giving the function a name we would normally associate with
>> a predicate ("Is it pending?"), whereas here the function is used to _mark_ a
>> softirq as pending. Hence in the end I didn't comment at all; I'd be fine
>> with "set", but I'm also okay with "pend".
>
> It's a set and check kind of function, so I don't care much. Just
> found the pend a bit too short, I don't think we usually abbreviate
> pending to pend.
Aiui it's not an abbreviation, but kind of a verb (inverse-)derived from pending.
I don't know whether that's "official"; my dictionary doesn't have it.
Jan
On 04/07/2025 9:25 am, Jan Beulich wrote:
> On 04.07.2025 09:55, Roger Pau Monné wrote:
>> On Fri, Jul 04, 2025 at 09:23:29AM +0200, Jan Beulich wrote:
>>> On 03.07.2025 18:21, Roger Pau Monné wrote:
>>>> On Wed, Jul 02, 2025 at 03:41:18PM +0100, Andrew Cooper wrote:
>>>>> --- a/xen/include/xen/softirq.h
>>>>> +++ b/xen/include/xen/softirq.h
>>>>> @@ -23,6 +23,22 @@ enum {
>>>>>
>>>>> #define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
>>>>>
>>>>> +/*
>>>>> + * Ensure softirq @nr is pending on @cpu. Return true if an IPI can be
>>>>> + * skipped, false if the IPI cannot be skipped.
>>>>> + */
>>>>> +#ifndef arch_pend_softirq
>>>>> +static always_inline bool arch_pend_softirq(unsigned int nr, unsigned int cpu)
>>>> Nit: I would maybe it arch_set_softirq(), I find `pend` not that clear
>>>> (I would rather fully spell `pending` instead).
>>> I, too, did wonder about the naming here. But using "pending" as you suggest
>>> has the effect of giving the function a name we would normally associate with
>>> a predicate ("Is it pending?"), whereas here the function is used to _mark_ a
>>> softirq as pending. Hence in the end I didn't comment at all; I'd be fine
>>> with "set", but I'm also okay with "pend".
>> It's a set and check kind of function, so I don't care much. Just
>> found the pend a bit too short, I don't think we usually abbreviate
>> pending to pend.
> Aiui it's not an abbreviation, but kind of a verb (inverse-)derived from pending.
> I don't know whether that's "official"; my dictionary doesn't have it.
It's used frequently in some circles, meaning "to make pending".
But I've changed the name because I don't care enough to argue.
~Andrew
On 02.07.2025 16:41, Andrew Cooper wrote: > x86 is the only architecture wanting an optimisation here, but the > test_and_set_bit() is a store into the monitored line Which is intentional aiui, while this reads as if this was part of the issue. > and is racy with determining whether an IPI can be skipped. Racy here as in limiting the effect of the optimization, but not affecting correctness aiui: If the woken CPU managed to clear the bit already, we'd needlessly IPI it. This could also do with saying. Jan
On 03/07/2025 9:11 am, Jan Beulich wrote: > On 02.07.2025 16:41, Andrew Cooper wrote: >> x86 is the only architecture wanting an optimisation here, but the >> test_and_set_bit() is a store into the monitored line > Which is intentional aiui, while this reads as if this was part of the issue. I don't understand what you're trying to say. It is racy, and that's why we're changing it. >> and is racy with determining whether an IPI can be skipped. > Racy here as in limiting the effect of the optimization, but not affecting > correctness aiui: If the woken CPU managed to clear the bit already, we'd > needlessly IPI it. Correct. > This could also do with saying. What about this? x86 is the only architecture wanting an optimisation here, but the test_and_set_bit() is a store into the monitored line (i.e. will wake up the target) and, prior to the removal of the broken IPI-elision algorithm, was racy, causing unnecessary IPIs to be sent. To do this in a race-free way, the store to the monited line needs to also sample the status of the target in one atomic action. Implement a new arch helper with different semantics; to make the softirq pending and decide about IPIs together. For now, implement the default helper. It will be overridden by x86 in a subsequent change. ~Andrew
On 03.07.2025 12:36, Andrew Cooper wrote: > On 03/07/2025 9:11 am, Jan Beulich wrote: >> On 02.07.2025 16:41, Andrew Cooper wrote: >>> x86 is the only architecture wanting an optimisation here, but the >>> test_and_set_bit() is a store into the monitored line >> Which is intentional aiui, while this reads as if this was part of the issue. > > I don't understand what you're trying to say. What I was trying to say is the way you wrote it to me it read as if you're describing two issues: There wrongly being a store into the monitored line, and ... > It is racy, and that's why we're changing it. > >>> and is racy with determining whether an IPI can be skipped. ... there being a race. >> Racy here as in limiting the effect of the optimization, but not affecting >> correctness aiui: If the woken CPU managed to clear the bit already, we'd >> needlessly IPI it. > > Correct. > >> This could also do with saying. > > What about this? > > x86 is the only architecture wanting an optimisation here, but the > test_and_set_bit() is a store into the monitored line (i.e. will wake up > the target) and, prior to the removal of the broken IPI-elision > algorithm, was racy, causing unnecessary IPIs to be sent. > > To do this in a race-free way, the store to the monited line needs to > also sample the status of the target in one atomic action. Implement a > new arch helper with different semantics; to make the softirq pending > and decide about IPIs together. For now, implement the default helper. > It will be overridden by x86 in a subsequent change. Better, yes. Thanks. Jan
© 2016 - 2025 Red Hat, Inc.