[v2] Intel RAR TLB invalidation

[RFC v2 6/9] x86/apic: Introduce Remote Action Request Operations

Posted by Rik van Riel 8 months, 3 weeks ago

From: Rik van Riel <riel@fb.com>

RAR TLB flushing is started by sending a command to the APIC.
This patch adds Remote Action Request commands.

[riel: move some things around to acount for 6 years of changes]

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
---
 arch/x86/include/asm/apicdef.h     |  1 +
 arch/x86/include/asm/irq_vectors.h |  5 +++++
 arch/x86/include/asm/smp.h         | 15 +++++++++++++++
 arch/x86/kernel/apic/ipi.c         | 23 +++++++++++++++++++----
 arch/x86/kernel/apic/local.h       |  3 +++
 arch/x86/kernel/smp.c              |  3 +++
 6 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..b152d45af91a 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -92,6 +92,7 @@
 #define		APIC_DM_LOWEST		0x00100
 #define		APIC_DM_SMI		0x00200
 #define		APIC_DM_REMRD		0x00300
+#define		APIC_DM_RAR		0x00300
 #define		APIC_DM_NMI		0x00400
 #define		APIC_DM_INIT		0x00500
 #define		APIC_DM_STARTUP		0x00600
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 47051871b436..c417b0015304 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -103,6 +103,11 @@
  */
 #define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
 
+/*
+ * RAR (remote action request) TLB flush
+ */
+#define RAR_VECTOR			0xe0
+
 #define NR_VECTORS			 256
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0c1c68039d6f..1ab9f5fcac8a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -40,6 +40,9 @@ struct smp_ops {
 
 	void (*send_call_func_ipi)(const struct cpumask *mask);
 	void (*send_call_func_single_ipi)(int cpu);
+
+	void (*send_rar_ipi)(const struct cpumask *mask);
+	void (*send_rar_single_ipi)(int cpu);
 };
 
 /* Globals due to paravirt */
@@ -100,6 +103,16 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 	smp_ops.send_call_func_ipi(mask);
 }
 
+static inline void arch_send_rar_single_ipi(int cpu)
+{
+	smp_ops.send_rar_single_ipi(cpu);
+}
+
+static inline void arch_send_rar_ipi_mask(const struct cpumask *mask)
+{
+	smp_ops.send_rar_ipi(mask);
+}
+
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
 void smp_prepare_cpus_common(void);
@@ -120,6 +133,8 @@ void __noreturn mwait_play_dead(unsigned int eax_hint);
 void native_smp_send_reschedule(int cpu);
 void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
+void native_send_rar_ipi(const struct cpumask *mask);
+void native_send_rar_single_ipi(int cpu);
 
 asmlinkage __visible void smp_reboot_interrupt(void);
 __visible void smp_reschedule_interrupt(struct pt_regs *regs);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 98a57cb4aa86..e5e9fc08f86c 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -79,7 +79,7 @@ void native_send_call_func_single_ipi(int cpu)
 	__apic_send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
 }
 
-void native_send_call_func_ipi(const struct cpumask *mask)
+static void do_native_send_ipi(const struct cpumask *mask, int vector)
 {
 	if (static_branch_likely(&apic_use_ipi_shorthand)) {
 		unsigned int cpu = smp_processor_id();
@@ -88,14 +88,19 @@ void native_send_call_func_ipi(const struct cpumask *mask)
 			goto sendmask;
 
 		if (cpumask_test_cpu(cpu, mask))
-			__apic_send_IPI_all(CALL_FUNCTION_VECTOR);
+			__apic_send_IPI_all(vector);
 		else if (num_online_cpus() > 1)
-			__apic_send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+			__apic_send_IPI_allbutself(vector);
 		return;
 	}
 
 sendmask:
-	__apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+	__apic_send_IPI_mask(mask, vector);
+}
+
+void native_send_call_func_ipi(const struct cpumask *mask)
+{
+	do_native_send_ipi(mask, CALL_FUNCTION_VECTOR);
 }
 
 void apic_send_nmi_to_offline_cpu(unsigned int cpu)
@@ -106,6 +111,16 @@ void apic_send_nmi_to_offline_cpu(unsigned int cpu)
 		return;
 	apic->send_IPI(cpu, NMI_VECTOR);
 }
+
+void native_send_rar_single_ipi(int cpu)
+{
+	apic->send_IPI_mask(cpumask_of(cpu), RAR_VECTOR);
+}
+
+void native_send_rar_ipi(const struct cpumask *mask)
+{
+	do_native_send_ipi(mask, RAR_VECTOR);
+}
 #endif /* CONFIG_SMP */
 
 static inline int __prepare_ICR2(unsigned int mask)
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index bdcf609eb283..833669174267 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -38,6 +38,9 @@ static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
 	case NMI_VECTOR:
 		icr |= APIC_DM_NMI;
 		break;
+	case RAR_VECTOR:
+		icr |= APIC_DM_RAR;
+		break;
 	}
 	return icr;
 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18266cc3d98c..2c51ed6aaf03 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -297,5 +297,8 @@ struct smp_ops smp_ops = {
 
 	.send_call_func_ipi	= native_send_call_func_ipi,
 	.send_call_func_single_ipi = native_send_call_func_single_ipi,
+
+	.send_rar_ipi		= native_send_rar_ipi,
+	.send_rar_single_ipi	= native_send_rar_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
-- 
2.49.0

Re: [RFC v2 6/9] x86/apic: Introduce Remote Action Request Operations

Posted by Dave Hansen 8 months, 3 weeks ago

> diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
> index 0c1c68039d6f..1ab9f5fcac8a 100644
> --- a/arch/x86/include/asm/smp.h
> +++ b/arch/x86/include/asm/smp.h
> @@ -40,6 +40,9 @@ struct smp_ops {
>  
>  	void (*send_call_func_ipi)(const struct cpumask *mask);
>  	void (*send_call_func_single_ipi)(int cpu);
> +
> +	void (*send_rar_ipi)(const struct cpumask *mask);
> +	void (*send_rar_single_ipi)(int cpu);
>  };

I assume Yu-cheng did it this way.

I'm curios why new smp_ops are needed for this, though. It's not like
there are a bunch of different implementations to pick between.


> -void native_send_call_func_ipi(const struct cpumask *mask)
> +static void do_native_send_ipi(const struct cpumask *mask, int vector)
>  {
>  	if (static_branch_likely(&apic_use_ipi_shorthand)) {
>  		unsigned int cpu = smp_processor_id();
> @@ -88,14 +88,19 @@ void native_send_call_func_ipi(const struct cpumask *mask)
>  			goto sendmask;
>  
>  		if (cpumask_test_cpu(cpu, mask))
> -			__apic_send_IPI_all(CALL_FUNCTION_VECTOR);
> +			__apic_send_IPI_all(vector);
>  		else if (num_online_cpus() > 1)
> -			__apic_send_IPI_allbutself(CALL_FUNCTION_VECTOR);
> +			__apic_send_IPI_allbutself(vector);
>  		return;
>  	}
>  
>  sendmask:
> -	__apic_send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
> +	__apic_send_IPI_mask(mask, vector);
> +}
> +
> +void native_send_call_func_ipi(const struct cpumask *mask)
> +{
> +	do_native_send_ipi(mask, CALL_FUNCTION_VECTOR);
>  }

This refactoring probably belongs in a separate patch.

>  void apic_send_nmi_to_offline_cpu(unsigned int cpu)
> @@ -106,6 +111,16 @@ void apic_send_nmi_to_offline_cpu(unsigned int cpu)
>  		return;
>  	apic->send_IPI(cpu, NMI_VECTOR);
>  }
> +
> +void native_send_rar_single_ipi(int cpu)
> +{
> +	apic->send_IPI_mask(cpumask_of(cpu), RAR_VECTOR);
> +}
> +
> +void native_send_rar_ipi(const struct cpumask *mask)
> +{
> +	do_native_send_ipi(mask, RAR_VECTOR);
> +}
>  #endif /* CONFIG_SMP */
>  
>  static inline int __prepare_ICR2(unsigned int mask)
> diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
> index bdcf609eb283..833669174267 100644
> --- a/arch/x86/kernel/apic/local.h
> +++ b/arch/x86/kernel/apic/local.h
> @@ -38,6 +38,9 @@ static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
>  	case NMI_VECTOR:
>  		icr |= APIC_DM_NMI;
>  		break;
> +	case RAR_VECTOR:
> +		icr |= APIC_DM_RAR;
> +		break;
>  	}
>  	return icr;
>  }
I feel like this patch is doing three separate things:

1. Adds smp_ops
2. Refactors native_send_call_func_ipi()
3. Adds RAR support

None of those are huge, but it would make a lot more sense to break
those out. I'm also still not sure of the point of the smp_ops.

Re: [RFC v2 6/9] x86/apic: Introduce Remote Action Request Operations

Posted by Rik van Riel 8 months, 3 weeks ago

On Wed, 2025-05-21 at 08:28 -0700, Dave Hansen wrote:
> > diff --git a/arch/x86/include/asm/smp.h
> > b/arch/x86/include/asm/smp.h
> > index 0c1c68039d6f..1ab9f5fcac8a 100644
> > --- a/arch/x86/include/asm/smp.h
> > +++ b/arch/x86/include/asm/smp.h
> > @@ -40,6 +40,9 @@ struct smp_ops {
> >  
> >  	void (*send_call_func_ipi)(const struct cpumask *mask);
> >  	void (*send_call_func_single_ipi)(int cpu);
> > +
> > +	void (*send_rar_ipi)(const struct cpumask *mask);
> > +	void (*send_rar_single_ipi)(int cpu);
> >  };
> 
> I assume Yu-cheng did it this way.
> 
> I'm curios why new smp_ops are needed for this, though. It's not like
> there are a bunch of different implementations to pick between.
> 
You are right, this was in the code I received.

> I feel like this patch is doing three separate things:
> 
> 1. Adds smp_ops
> 2. Refactors native_send_call_func_ipi()
> 3. Adds RAR support
> 
> None of those are huge, but it would make a lot more sense to break
> those out. I'm also still not sure of the point of the smp_ops.
> 
I am not very familiar with this part of the kernel,
but would be happy to make whatever changes the
maintainers want to see.

-- 
All Rights Reversed.

Re: [RFC v2 6/9] x86/apic: Introduce Remote Action Request Operations

Posted by Ingo Molnar 8 months, 3 weeks ago

* Rik van Riel <riel@surriel.com> wrote:

> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index 47051871b436..c417b0015304 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -103,6 +103,11 @@
>   */
>  #define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
>  
> +/*
> + * RAR (remote action request) TLB flush
> + */
> +#define RAR_VECTOR			0xe0
> +
>  #define NR_VECTORS			 256

This subtly breaks x86 IRQ vector allocation AFAICS.

Right now device IRQ vectors are allocated from 0x81 to 
FIRST_SYSTEM_VECTOR (POSTED_MSI_NOTIFICATION_VECTOR) or 0xeb.

But RAR_VECTOR is within that range, the the IRQ allocator will overlap 
it and result in what I guess will be misbehaving RAR code and 
misbehaving device IRQ handling once it hands out 0xeb as well.

So you need to lower NR_EXTERNAL_VECTORS for there to be no overlap 
between device IRQ vectors and system IRQ vectors.

This will substantially compresses the available device vector space 
from ~108 vectors to ~95 vectors, a ~12% reduction. RAR, under the 
current device IRQ vector allocator, will effectively reduce the number 
of vectors not by 1 vector, but by 13 vectors. This should be pointed 
out in the changelog.

It probably doesn't matter much due to MSI multiplexing, but should 
nevertheless be implemented correctly and should be documented.

Thanks,

	Ingo

Re: [RFC v2 6/9] x86/apic: Introduce Remote Action Request Operations

Posted by Rik van Riel 8 months, 1 week ago

On Tue, 2025-05-20 at 11:16 +0200, Ingo Molnar wrote:
> 
> * Rik van Riel <riel@surriel.com> wrote:
> 
> > diff --git a/arch/x86/include/asm/irq_vectors.h
> > b/arch/x86/include/asm/irq_vectors.h
> > index 47051871b436..c417b0015304 100644
> > --- a/arch/x86/include/asm/irq_vectors.h
> > +++ b/arch/x86/include/asm/irq_vectors.h
> > @@ -103,6 +103,11 @@
> >   */
> >  #define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
> >  
> > +/*
> > + * RAR (remote action request) TLB flush
> > + */
> > +#define RAR_VECTOR			0xe0
> > +
> >  #define NR_VECTORS			 256
> 
> This subtly breaks x86 IRQ vector allocation AFAICS.
> 
> Right now device IRQ vectors are allocated from 0x81 to 
> FIRST_SYSTEM_VECTOR (POSTED_MSI_NOTIFICATION_VECTOR) or 0xeb.
> 
> But RAR_VECTOR is within that range, the the IRQ allocator will
> overlap 
> it and result in what I guess will be misbehaving RAR code and 
> misbehaving device IRQ handling once it hands out 0xeb as well.

Sure enough! After fixing this issue, the nearly instant
segfaults for programs using RAR are no longer happening.

I'll let it run tests overnight, and will hopefully be able
to post a reliable v3 tomorrow.

Thank you!

-- 
All Rights Reversed.