From: Rik van Riel <riel@fb.com>
RAR TLB flushing is started by sending a command to the APIC.
This patch adds Remote Action Request commands.
[riel: move some things around to acount for 6 years of changes]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
---
arch/x86/include/asm/apicdef.h | 1 +
arch/x86/include/asm/irq_vectors.h | 5 +++++
arch/x86/include/asm/smp.h | 15 +++++++++++++++
arch/x86/kernel/apic/ipi.c | 23 +++++++++++++++++++----
arch/x86/kernel/apic/local.h | 3 +++
arch/x86/kernel/smp.c | 3 +++
6 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..b152d45af91a 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -92,6 +92,7 @@
#define APIC_DM_LOWEST 0x00100
#define APIC_DM_SMI 0x00200
#define APIC_DM_REMRD 0x00300
+#define APIC_DM_RAR 0x00300
#define APIC_DM_NMI 0x00400
#define APIC_DM_INIT 0x00500
#define APIC_DM_STARTUP 0x00600
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 47051871b436..c417b0015304 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -103,6 +103,11 @@
*/
#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb
+/*
+ * RAR (remote action request) TLB flush
+ */
+#define RAR_VECTOR 0xe0
+
#define NR_VECTORS 256
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0c1c68039d6f..1ab9f5fcac8a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -40,6 +40,9 @@ struct smp_ops {
void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
+
+ void (*send_rar_ipi)(const struct cpumask *mask);
+ void (*send_rar_single_ipi)(int cpu);
};
/* Globals due to paravirt */
@@ -100,6 +103,16 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
smp_ops.send_call_func_ipi(mask);
}
+static inline void arch_send_rar_single_ipi(int cpu)
+{
+ smp_ops.send_rar_single_ipi(cpu);
+}
+
+static inline void arch_send_rar_ipi_mask(const struct cpumask *mask)
+{
+ smp_ops.send_rar_ipi(mask);
+}
+
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void smp_prepare_cpus_common(void);
@@ -120,6 +133,8 @@ void __noreturn mwait_play_dead(unsigned int eax_hint);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
+void native_send_rar_ipi(const struct cpumask *mask);
+void native_send_rar_single_ipi(int cpu);
asmlinkage __visible void smp_reboot_interrupt(void);
__visible void smp_reschedule_interrupt(struct pt_regs *regs);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 98a57cb4aa86..e5e9fc08f86c 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -79,7 +79,7 @@ void native_send_call_func_single_ipi(int cpu)
__apic_send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
}
-void native_send_call_func_ipi(const struct cpumask *mask)
+static void do_native_send_ipi(const struct cpumask *mask, int vector)
{
if (static_branch_likely(&apic_use_ipi_shorthand)) {
unsigned int cpu = smp_processor_id();
@@ -88,14 +88,19 @@ void native_send_call_func_ipi(const struct cpumask *mask)
goto sendmask;
if (cpumask_test_cpu(cpu, mask))
- __apic_send_IPI_all(CALL_FUNCTION_VECTOR);
+ __apic_send_IPI_all(vector);
else if (num_online_cpus() > 1)
- __apic_send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+ __apic_send_IPI_allbutself(vector);
return;
}
sendmask:
- __apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+ __apic_send_IPI_mask(mask, vector);
+}
+
+void native_send_call_func_ipi(const struct cpumask *mask)
+{
+ do_native_send_ipi(mask, CALL_FUNCTION_VECTOR);
}
void apic_send_nmi_to_offline_cpu(unsigned int cpu)
@@ -106,6 +111,16 @@ void apic_send_nmi_to_offline_cpu(unsigned int cpu)
return;
apic->send_IPI(cpu, NMI_VECTOR);
}
+
+void native_send_rar_single_ipi(int cpu)
+{
+ apic->send_IPI_mask(cpumask_of(cpu), RAR_VECTOR);
+}
+
+void native_send_rar_ipi(const struct cpumask *mask)
+{
+ do_native_send_ipi(mask, RAR_VECTOR);
+}
#endif /* CONFIG_SMP */
static inline int __prepare_ICR2(unsigned int mask)
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index bdcf609eb283..833669174267 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -38,6 +38,9 @@ static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
case NMI_VECTOR:
icr |= APIC_DM_NMI;
break;
+ case RAR_VECTOR:
+ icr |= APIC_DM_RAR;
+ break;
}
return icr;
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18266cc3d98c..2c51ed6aaf03 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -297,5 +297,8 @@ struct smp_ops smp_ops = {
.send_call_func_ipi = native_send_call_func_ipi,
.send_call_func_single_ipi = native_send_call_func_single_ipi,
+
+ .send_rar_ipi = native_send_rar_ipi,
+ .send_rar_single_ipi = native_send_rar_single_ipi,
};
EXPORT_SYMBOL_GPL(smp_ops);
--
2.49.0
> diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
> index 0c1c68039d6f..1ab9f5fcac8a 100644
> --- a/arch/x86/include/asm/smp.h
> +++ b/arch/x86/include/asm/smp.h
> @@ -40,6 +40,9 @@ struct smp_ops {
>
> void (*send_call_func_ipi)(const struct cpumask *mask);
> void (*send_call_func_single_ipi)(int cpu);
> +
> + void (*send_rar_ipi)(const struct cpumask *mask);
> + void (*send_rar_single_ipi)(int cpu);
> };
I assume Yu-cheng did it this way.
I'm curios why new smp_ops are needed for this, though. It's not like
there are a bunch of different implementations to pick between.
> -void native_send_call_func_ipi(const struct cpumask *mask)
> +static void do_native_send_ipi(const struct cpumask *mask, int vector)
> {
> if (static_branch_likely(&apic_use_ipi_shorthand)) {
> unsigned int cpu = smp_processor_id();
> @@ -88,14 +88,19 @@ void native_send_call_func_ipi(const struct cpumask *mask)
> goto sendmask;
>
> if (cpumask_test_cpu(cpu, mask))
> - __apic_send_IPI_all(CALL_FUNCTION_VECTOR);
> + __apic_send_IPI_all(vector);
> else if (num_online_cpus() > 1)
> - __apic_send_IPI_allbutself(CALL_FUNCTION_VECTOR);
> + __apic_send_IPI_allbutself(vector);
> return;
> }
>
> sendmask:
> - __apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
> + __apic_send_IPI_mask(mask, vector);
> +}
> +
> +void native_send_call_func_ipi(const struct cpumask *mask)
> +{
> + do_native_send_ipi(mask, CALL_FUNCTION_VECTOR);
> }
This refactoring probably belongs in a separate patch.
> void apic_send_nmi_to_offline_cpu(unsigned int cpu)
> @@ -106,6 +111,16 @@ void apic_send_nmi_to_offline_cpu(unsigned int cpu)
> return;
> apic->send_IPI(cpu, NMI_VECTOR);
> }
> +
> +void native_send_rar_single_ipi(int cpu)
> +{
> + apic->send_IPI_mask(cpumask_of(cpu), RAR_VECTOR);
> +}
> +
> +void native_send_rar_ipi(const struct cpumask *mask)
> +{
> + do_native_send_ipi(mask, RAR_VECTOR);
> +}
> #endif /* CONFIG_SMP */
>
> static inline int __prepare_ICR2(unsigned int mask)
> diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
> index bdcf609eb283..833669174267 100644
> --- a/arch/x86/kernel/apic/local.h
> +++ b/arch/x86/kernel/apic/local.h
> @@ -38,6 +38,9 @@ static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
> case NMI_VECTOR:
> icr |= APIC_DM_NMI;
> break;
> + case RAR_VECTOR:
> + icr |= APIC_DM_RAR;
> + break;
> }
> return icr;
> }
I feel like this patch is doing three separate things:
1. Adds smp_ops
2. Refactors native_send_call_func_ipi()
3. Adds RAR support
None of those are huge, but it would make a lot more sense to break
those out. I'm also still not sure of the point of the smp_ops.
On Wed, 2025-05-21 at 08:28 -0700, Dave Hansen wrote:
> > diff --git a/arch/x86/include/asm/smp.h
> > b/arch/x86/include/asm/smp.h
> > index 0c1c68039d6f..1ab9f5fcac8a 100644
> > --- a/arch/x86/include/asm/smp.h
> > +++ b/arch/x86/include/asm/smp.h
> > @@ -40,6 +40,9 @@ struct smp_ops {
> >
> > void (*send_call_func_ipi)(const struct cpumask *mask);
> > void (*send_call_func_single_ipi)(int cpu);
> > +
> > + void (*send_rar_ipi)(const struct cpumask *mask);
> > + void (*send_rar_single_ipi)(int cpu);
> > };
>
> I assume Yu-cheng did it this way.
>
> I'm curios why new smp_ops are needed for this, though. It's not like
> there are a bunch of different implementations to pick between.
>
You are right, this was in the code I received.
> I feel like this patch is doing three separate things:
>
> 1. Adds smp_ops
> 2. Refactors native_send_call_func_ipi()
> 3. Adds RAR support
>
> None of those are huge, but it would make a lot more sense to break
> those out. I'm also still not sure of the point of the smp_ops.
>
I am not very familiar with this part of the kernel,
but would be happy to make whatever changes the
maintainers want to see.
--
All Rights Reversed.
* Rik van Riel <riel@surriel.com> wrote: > diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h > index 47051871b436..c417b0015304 100644 > --- a/arch/x86/include/asm/irq_vectors.h > +++ b/arch/x86/include/asm/irq_vectors.h > @@ -103,6 +103,11 @@ > */ > #define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > > +/* > + * RAR (remote action request) TLB flush > + */ > +#define RAR_VECTOR 0xe0 > + > #define NR_VECTORS 256 This subtly breaks x86 IRQ vector allocation AFAICS. Right now device IRQ vectors are allocated from 0x81 to FIRST_SYSTEM_VECTOR (POSTED_MSI_NOTIFICATION_VECTOR) or 0xeb. But RAR_VECTOR is within that range, the the IRQ allocator will overlap it and result in what I guess will be misbehaving RAR code and misbehaving device IRQ handling once it hands out 0xeb as well. So you need to lower NR_EXTERNAL_VECTORS for there to be no overlap between device IRQ vectors and system IRQ vectors. This will substantially compresses the available device vector space from ~108 vectors to ~95 vectors, a ~12% reduction. RAR, under the current device IRQ vector allocator, will effectively reduce the number of vectors not by 1 vector, but by 13 vectors. This should be pointed out in the changelog. It probably doesn't matter much due to MSI multiplexing, but should nevertheless be implemented correctly and should be documented. Thanks, Ingo
On Tue, 2025-05-20 at 11:16 +0200, Ingo Molnar wrote: > > * Rik van Riel <riel@surriel.com> wrote: > > > diff --git a/arch/x86/include/asm/irq_vectors.h > > b/arch/x86/include/asm/irq_vectors.h > > index 47051871b436..c417b0015304 100644 > > --- a/arch/x86/include/asm/irq_vectors.h > > +++ b/arch/x86/include/asm/irq_vectors.h > > @@ -103,6 +103,11 @@ > > */ > > #define POSTED_MSI_NOTIFICATION_VECTOR 0xeb > > > > +/* > > + * RAR (remote action request) TLB flush > > + */ > > +#define RAR_VECTOR 0xe0 > > + > > #define NR_VECTORS 256 > > This subtly breaks x86 IRQ vector allocation AFAICS. > > Right now device IRQ vectors are allocated from 0x81 to > FIRST_SYSTEM_VECTOR (POSTED_MSI_NOTIFICATION_VECTOR) or 0xeb. > > But RAR_VECTOR is within that range, the the IRQ allocator will > overlap > it and result in what I guess will be misbehaving RAR code and > misbehaving device IRQ handling once it hands out 0xeb as well. Sure enough! After fixing this issue, the nearly instant segfaults for programs using RAR are no longer happening. I'll let it run tests overnight, and will hopefully be able to post a reliable v3 tomorrow. Thank you! -- All Rights Reversed.
© 2016 - 2025 Red Hat, Inc.