[v13] AMD broadcast TLB invalidation

[PATCH v13 07/14] x86/mm: global ASID allocation helper functions

Posted by Rik van Riel 11 months, 2 weeks ago

Functions to manage global ASID space. Multithreaded processes that
are simultaneously active on 4 or more CPUs can get a global ASID,
resulting in the same PCID being used for that process on every CPU.

This in turn will allow the kernel to use hardware-assisted TLB flushing
through AMD INVLPGB or Intel RAR for these processes.

Helper functions split out by request.

Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
---
 arch/x86/include/asm/mmu.h      |  11 +++
 arch/x86/include/asm/tlbflush.h |  43 ++++++++++
 arch/x86/mm/tlb.c               | 144 +++++++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 3b496cdcb74b..edb5942d4829 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -69,6 +69,17 @@ typedef struct {
 	u16 pkey_allocation_map;
 	s16 execute_only_pkey;
 #endif
+
+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
+	/*
+	 * The global ASID will be a non-zero value when the process has
+	 * the same ASID across all CPUs, allowing it to make use of
+	 * hardware-assisted remote TLB invalidation like AMD INVLPGB.
+	 */
+	u16 global_asid;
+	/* The process is transitioning to a new global ASID number. */
+	bool asid_transition;
+#endif
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(mm)						\
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 09463a2fb05f..83f1da2f1e4a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -6,6 +6,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/sched.h>
 
+#include <asm/barrier.h>
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
@@ -234,6 +235,48 @@ void flush_tlb_one_kernel(unsigned long addr);
 void flush_tlb_multi(const struct cpumask *cpumask,
 		      const struct flush_tlb_info *info);
 
+static inline bool is_dyn_asid(u16 asid)
+{
+	return asid < TLB_NR_DYN_ASIDS;
+}
+
+#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+	u16 asid;
+
+	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+		return 0;
+
+	asid = smp_load_acquire(&mm->context.global_asid);
+
+	/* mm->context.global_asid is either 0, or a global ASID */
+	VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
+
+	return asid;
+}
+
+static inline void assign_mm_global_asid(struct mm_struct *mm, u16 asid)
+{
+	/*
+	 * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
+	 * finish_asid_transition() needs to observe asid_transition = true
+	 * once it observes global_asid.
+	 */
+	mm->context.asid_transition = true;
+	smp_store_release(&mm->context.global_asid, asid);
+}
+#else
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void assign_mm_global_asid(struct mm_struct *mm, u16 asid)
+{
+}
+#endif
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #endif
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 16839651f67f..405630479b90 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -74,13 +74,15 @@
  * use different names for each of them:
  *
  * ASID  - [0, TLB_NR_DYN_ASIDS-1]
- *         the canonical identifier for an mm
+ *         the canonical identifier for an mm, dynamically allocated on each CPU
+ *         [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
+ *         the canonical, global identifier for an mm, identical across all CPUs
  *
- * kPCID - [1, TLB_NR_DYN_ASIDS]
+ * kPCID - [1, MAX_ASID_AVAILABLE]
  *         the value we write into the PCID part of CR3; corresponds to the
  *         ASID+1, because PCID 0 is special.
  *
- * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
  *         for KPTI each mm has two address spaces and thus needs two
  *         PCID values, but we can still do with a single ASID denomination
  *         for each mm. Corresponds to kPCID + 2048.
@@ -251,6 +253,142 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
 	*need_flush = true;
 }
 
+/*
+ * Global ASIDs are allocated for multi-threaded processes that are
+ * active on multiple CPUs simultaneously, giving each of those
+ * processes the same PCIDs on every CPU, for use with hardware-assisted
+ * TLB shootdown on remote CPUs, like AMD INVLPGB or Intel RAR.
+ *
+ * These global ASIDs are held for the lifetime of the process.
+ */
+static DEFINE_RAW_SPINLOCK(global_asid_lock);
+static u16 last_global_asid = MAX_ASID_AVAILABLE;
+static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE);
+static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE);
+static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
+
+/*
+ * When the search for a free ASID in the global ASID space reaches
+ * MAX_ASID_AVAILABLE, a global TLB flush guarantees that previously
+ * freed global ASIDs are safe to re-use.
+ *
+ * This way the global flush only needs to happen at ASID rollover
+ * time, and not at ASID allocation time.
+ */
+static void reset_global_asid_space(void)
+{
+	lockdep_assert_held(&global_asid_lock);
+
+	invlpgb_flush_all_nonglobals();
+
+	/*
+	 * The TLB flush above makes it safe to re-use the previously
+	 * freed global ASIDs.
+	 */
+	bitmap_andnot(global_asid_used, global_asid_used,
+			global_asid_freed, MAX_ASID_AVAILABLE);
+	bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE);
+
+	/* Restart the search from the start of global ASID space. */
+	last_global_asid = TLB_NR_DYN_ASIDS;
+}
+
+static u16 allocate_global_asid(void)
+{
+	u16 asid;
+
+	lockdep_assert_held(&global_asid_lock);
+
+	/* The previous allocation hit the edge of available address space */
+	if (last_global_asid >= MAX_ASID_AVAILABLE - 1)
+		reset_global_asid_space();
+
+	asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, last_global_asid);
+
+	if (asid >= MAX_ASID_AVAILABLE) {
+		/* This should never happen. */
+		VM_WARN_ONCE(1, "Unable to allocate global ASID despite %d available\n",
+				global_asid_available);
+		return 0;
+	}
+
+	/* Claim this global ASID. */
+	__set_bit(asid, global_asid_used);
+	last_global_asid = asid;
+	global_asid_available--;
+	return asid;
+}
+
+/*
+ * Check whether a process is currently active on more than "threshold" CPUs.
+ * This is a cheap estimation on whether or not it may make sense to assign
+ * a global ASID to this process, and use broadcast TLB invalidation.
+ */
+static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
+{
+	int count = 0;
+	int cpu;
+
+	/* This quick check should eliminate most single threaded programs. */
+	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
+		return false;
+
+	/* Slower check to make sure. */
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		/* Skip the CPUs that aren't really running this process. */
+		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
+			continue;
+
+		if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
+			continue;
+
+		if (++count > threshold)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Assign a global ASID to the current process, protecting against
+ * races between multiple threads in the process.
+ */
+static void use_global_asid(struct mm_struct *mm)
+{
+	u16 asid;
+
+	guard(raw_spinlock_irqsave)(&global_asid_lock);
+
+	/* This process is already using broadcast TLB invalidation. */
+	if (mm_global_asid(mm))
+		return;
+
+	/* The last global ASID was consumed while waiting for the lock. */
+	if (!global_asid_available) {
+		VM_WARN_ONCE(1, "Ran out of global ASIDs\n");
+		return;
+	}
+
+	asid = allocate_global_asid();
+	if (!asid)
+		return;
+
+	assign_mm_global_asid(mm, asid);
+}
+
+void destroy_context_free_global_asid(struct mm_struct *mm)
+{
+	if (!mm_global_asid(mm))
+		return;
+
+	guard(raw_spinlock_irqsave)(&global_asid_lock);
+
+	/* The global ASID can be re-used only after flush at wrap-around. */
+	__set_bit(mm->context.global_asid, global_asid_freed);
+
+	mm->context.global_asid = 0;
+	global_asid_available++;
+}
+
 /*
  * Given an ASID, flush the corresponding user ASID.  We can delay this
  * until the next time we switch to it.
-- 
2.47.1

Re: [PATCH v13 07/14] x86/mm: global ASID allocation helper functions

Posted by Borislav Petkov 11 months, 2 weeks ago

On Sun, Feb 23, 2025 at 02:48:57PM -0500, Rik van Riel wrote:
> Subject: Re: [PATCH v13 07/14] x86/mm: global ASID allocation helper functions

This subject needs a verb. I.e.,

"Add global ..."

> Functions to manage global ASID space. 

Ditto.

> Multithreaded processes that
> are simultaneously active on 4 or more CPUs can get a global ASID,
> resulting in the same PCID being used for that process on every CPU.
> 
> This in turn will allow the kernel to use hardware-assisted TLB flushing
> through AMD INVLPGB or Intel RAR for these processes.
> 
> Helper functions split out by request.

No need for that sentence.

> Signed-off-by: Rik van Riel <riel@surriel.com>
> Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
> Tested-by: Manali Shukla <Manali.Shukla@amd.com>
> Tested-by: Brendan Jackman <jackmanb@google.com>
> Tested-by: Michael Kelley <mhklinux@outlook.com>
> ---
>  arch/x86/include/asm/mmu.h      |  11 +++
>  arch/x86/include/asm/tlbflush.h |  43 ++++++++++
>  arch/x86/mm/tlb.c               | 144 +++++++++++++++++++++++++++++++-
>  3 files changed, 195 insertions(+), 3 deletions(-)

arch/x86/mm/tlb.c:378:6: warning: no previous prototype for ‘destroy_context_free_global_asid’ [-Wmissing-prototypes]
  378 | void destroy_context_free_global_asid(struct mm_struct *mm)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/mm/tlb.c:355:13: warning: ‘use_global_asid’ defined but not used [-Wunused-function]
  355 | static void use_global_asid(struct mm_struct *mm)
      |             ^~~~~~~~~~~~~~~
arch/x86/mm/tlb.c:327:13: warning: ‘mm_active_cpus_exceeds’ defined but not used [-Wunused-function]
  327 | static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
      |             ^~~~~~~~~~~~~~~~~~~~~~

If those functions are going to remain global they better get a proper prefix
like "tlb_".

And add the functions with their first use - no need to pre-add them.

> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 3b496cdcb74b..edb5942d4829 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -69,6 +69,17 @@ typedef struct {
>  	u16 pkey_allocation_map;
>  	s16 execute_only_pkey;
>  #endif
> +
> +#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
> +	/*
> +	 * The global ASID will be a non-zero value when the process has
> +	 * the same ASID across all CPUs, allowing it to make use of
> +	 * hardware-assisted remote TLB invalidation like AMD INVLPGB.
> +	 */
> +	u16 global_asid;
> +	/* The process is transitioning to a new global ASID number. */
> +	bool asid_transition;
> +#endif
>  } mm_context_t;
>  
>  #define INIT_MM_CONTEXT(mm)						\
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index 09463a2fb05f..83f1da2f1e4a 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -6,6 +6,7 @@
>  #include <linux/mmu_notifier.h>
>  #include <linux/sched.h>
>  
> +#include <asm/barrier.h>
>  #include <asm/processor.h>
>  #include <asm/cpufeature.h>
>  #include <asm/special_insns.h>
> @@ -234,6 +235,48 @@ void flush_tlb_one_kernel(unsigned long addr);
>  void flush_tlb_multi(const struct cpumask *cpumask,
>  		      const struct flush_tlb_info *info);
>  
> +static inline bool is_dyn_asid(u16 asid)
> +{
> +	return asid < TLB_NR_DYN_ASIDS;
> +}
> +
> +#ifdef CONFIG_X86_BROADCAST_TLB_FLUSH
> +static inline u16 mm_global_asid(struct mm_struct *mm)
> +{
> +	u16 asid;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
> +		return 0;
> +
> +	asid = smp_load_acquire(&mm->context.global_asid);
> +
> +	/* mm->context.global_asid is either 0, or a global ASID */
> +	VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
> +
> +	return asid;
> +}
> +
> +static inline void assign_mm_global_asid(struct mm_struct *mm, u16 asid)

mm_assign_global_asid()

> +{
> +	/*
> +	 * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
> +	 * finish_asid_transition() needs to observe asid_transition = true
> +	 * once it observes global_asid.
> +	 */
> +	mm->context.asid_transition = true;
> +	smp_store_release(&mm->context.global_asid, asid);
> +}
> +#else
> +static inline u16 mm_global_asid(struct mm_struct *mm)
> +{
> +	return 0;
> +}
> +
> +static inline void assign_mm_global_asid(struct mm_struct *mm, u16 asid)
> +{
> +}
> +#endif
> +
>  #ifdef CONFIG_PARAVIRT
>  #include <asm/paravirt.h>
>  #endif
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 16839651f67f..405630479b90 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -74,13 +74,15 @@
>   * use different names for each of them:
>   *
>   * ASID  - [0, TLB_NR_DYN_ASIDS-1]
> - *         the canonical identifier for an mm
> + *         the canonical identifier for an mm, dynamically allocated on each CPU
> + *         [TLB_NR_DYN_ASIDS, MAX_ASID_AVAILABLE-1]
> + *         the canonical, global identifier for an mm, identical across all CPUs
>   *
> - * kPCID - [1, TLB_NR_DYN_ASIDS]
> + * kPCID - [1, MAX_ASID_AVAILABLE]
>   *         the value we write into the PCID part of CR3; corresponds to the
>   *         ASID+1, because PCID 0 is special.
>   *
> - * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
> + * uPCID - [2048 + 1, 2048 + MAX_ASID_AVAILABLE]
>   *         for KPTI each mm has two address spaces and thus needs two
>   *         PCID values, but we can still do with a single ASID denomination
>   *         for each mm. Corresponds to kPCID + 2048.
> @@ -251,6 +253,142 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
>  	*need_flush = true;
>  }
>  
> +/*
> + * Global ASIDs are allocated for multi-threaded processes that are
> + * active on multiple CPUs simultaneously, giving each of those
> + * processes the same PCIDs on every CPU, for use with hardware-assisted

"the same PCID" or "the same PCIDs"?

Does a multithreaded process get one or more than one PCIDs? I'd expect only
one ofc.

> + * TLB shootdown on remote CPUs, like AMD INVLPGB or Intel RAR.
> + *
> + * These global ASIDs are held for the lifetime of the process.
> + */
> +static DEFINE_RAW_SPINLOCK(global_asid_lock);
> +static u16 last_global_asid = MAX_ASID_AVAILABLE;
> +static DECLARE_BITMAP(global_asid_used, MAX_ASID_AVAILABLE);
> +static DECLARE_BITMAP(global_asid_freed, MAX_ASID_AVAILABLE);
> +static int global_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
> +
> +/*
> + * When the search for a free ASID in the global ASID space reaches
> + * MAX_ASID_AVAILABLE, a global TLB flush guarantees that previously
> + * freed global ASIDs are safe to re-use.
> + *
> + * This way the global flush only needs to happen at ASID rollover
> + * time, and not at ASID allocation time.
> + */
> +static void reset_global_asid_space(void)
> +{
> +	lockdep_assert_held(&global_asid_lock);
> +
> +	invlpgb_flush_all_nonglobals();
> +
> +	/*
> +	 * The TLB flush above makes it safe to re-use the previously
> +	 * freed global ASIDs.
> +	 */
> +	bitmap_andnot(global_asid_used, global_asid_used,
> +			global_asid_freed, MAX_ASID_AVAILABLE);
> +	bitmap_clear(global_asid_freed, 0, MAX_ASID_AVAILABLE);
> +
> +	/* Restart the search from the start of global ASID space. */
> +	last_global_asid = TLB_NR_DYN_ASIDS;
> +}
> +
> +static u16 allocate_global_asid(void)
> +{
> +	u16 asid;
> +
> +	lockdep_assert_held(&global_asid_lock);
> +
> +	/* The previous allocation hit the edge of available address space */
> +	if (last_global_asid >= MAX_ASID_AVAILABLE - 1)
> +		reset_global_asid_space();
> +
> +	asid = find_next_zero_bit(global_asid_used, MAX_ASID_AVAILABLE, last_global_asid);
> +
> +	if (asid >= MAX_ASID_AVAILABLE) {

	if (asid >= MAX_ASID_AVAILABLE && !global_asid_available)

> +		/* This should never happen. */
> +		VM_WARN_ONCE(1, "Unable to allocate global ASID despite %d available\n",
> +				global_asid_available);
> +		return 0;
> +	}
> +
> +	/* Claim this global ASID. */
> +	__set_bit(asid, global_asid_used);
> +	last_global_asid = asid;
> +	global_asid_available--;
> +	return asid;
> +}
> +
> +/*
> + * Check whether a process is currently active on more than "threshold" CPUs.

@threshold - then it is clear that you mean the function argument.

> + * This is a cheap estimation on whether or not it may make sense to assign
> + * a global ASID to this process, and use broadcast TLB invalidation.
> + */
> +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int threshold)
> +{
> +	int count = 0;
> +	int cpu;

Is this function supposed to hold some sort of a lock?

> +
> +	/* This quick check should eliminate most single threaded programs. */
> +	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
> +		return false;
> +
> +	/* Slower check to make sure. */
> +	for_each_cpu(cpu, mm_cpumask(mm)) {

Needs CPU hotplug protection?

> +		/* Skip the CPUs that aren't really running this process. */
> +		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
> +			continue;
> +
> +		if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
> +			continue;
> +
> +		if (++count > threshold)
> +			return true;
> +	}
> +	return false;
> +}
> +
> +/*
> + * Assign a global ASID to the current process, protecting against
> + * races between multiple threads in the process.
> + */
> +static void use_global_asid(struct mm_struct *mm)
> +{
> +	u16 asid;
> +
> +	guard(raw_spinlock_irqsave)(&global_asid_lock);
> +
> +	/* This process is already using broadcast TLB invalidation. */
> +	if (mm_global_asid(mm))
> +		return;
> +
> +	/* The last global ASID was consumed while waiting for the lock. */
> +	if (!global_asid_available) {
> +		VM_WARN_ONCE(1, "Ran out of global ASIDs\n");

And? Why do we want to warn here? We need to reset global ASIDs anyway.

> +		return;
> +	}
> +
> +	asid = allocate_global_asid();
> +	if (!asid)
> +		return;
> +
> +	assign_mm_global_asid(mm, asid);
> +}
> +
> +void destroy_context_free_global_asid(struct mm_struct *mm)

That name is a mouthful. mm_free_global_asid() is just fine.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH v13 07/14] x86/mm: global ASID allocation helper functions

Posted by Rik van Riel 11 months, 2 weeks ago

On Mon, 2025-02-24 at 15:16 +0100, Borislav Petkov wrote:
> On Sun, Feb 23, 2025 at 02:48:57PM -0500, Rik van Riel wrote:
> > 
> >  arch/x86/include/asm/mmu.h      |  11 +++
> >  arch/x86/include/asm/tlbflush.h |  43 ++++++++++
> >  arch/x86/mm/tlb.c               | 144
> > +++++++++++++++++++++++++++++++-
> >  3 files changed, 195 insertions(+), 3 deletions(-)
> 
> arch/x86/mm/tlb.c:378:6: warning: no previous prototype for
> ‘destroy_context_free_global_asid’ [-Wmissing-prototypes]
>   378 | void destroy_context_free_global_asid(struct mm_struct *mm)
>       |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> arch/x86/mm/tlb.c:355:13: warning: ‘use_global_asid’ defined but not
> used [-Wunused-function]
>   355 | static void use_global_asid(struct mm_struct *mm)
>       |             ^~~~~~~~~~~~~~~
> arch/x86/mm/tlb.c:327:13: warning: ‘mm_active_cpus_exceeds’ defined
> but not used [-Wunused-function]
>   327 | static bool mm_active_cpus_exceeds(struct mm_struct *mm, int
> threshold)
>       |             ^~~~~~~~~~~~~~~~~~~~~~
> 
> If those functions are going to remain global they better get a
> proper prefix
> like "tlb_".
> 
I've renamed the global function to 
tlb_destroy_context_free_global_asid

> And add the functions with their first use - no need to pre-add them.

That's what I originally had. Dave requested I split
out the patch into multiple ones.

That means either introducing helper functions in a
separate patch, or coming up with one version of the
code in one patch, and then replacing that code in
the next, resulting in a bunch of extra code to review.

https://lore.kernel.org/linux-kernel/b067a9fc-ff5f-4baa-a1ff-3fa749ae4d44@intel.com/
> 
> > 
> > +static inline void assign_mm_global_asid(struct mm_struct *mm, u16
> > asid)
> 
> mm_assign_global_asid()

Done.

> > +/*
> > + * Global ASIDs are allocated for multi-threaded processes that
> > are
> > + * active on multiple CPUs simultaneously, giving each of those
> > + * processes the same PCIDs on every CPU, for use with hardware-
> > assisted
> 
> "the same PCID" or "the same PCIDs"?
> 
> Does a multithreaded process get one or more than one PCIDs? I'd
> expect only
> one ofc.

It's only one. Fixed the commment.

> 
> > +	asid = find_next_zero_bit(global_asid_used,
> > MAX_ASID_AVAILABLE, last_global_asid);
> > +
> > +	if (asid >= MAX_ASID_AVAILABLE) {
> 
> 	if (asid >= MAX_ASID_AVAILABLE && !global_asid_available)

Done.

> > +/*
> > + * Check whether a process is currently active on more than
> > "threshold" CPUs.
> 
> @threshold - then it is clear that you mean the function argument.
> 
Done. Thank you.

> > + * This is a cheap estimation on whether or not it may make sense
> > to assign
> > + * a global ASID to this process, and use broadcast TLB
> > invalidation.
> > + */
> > +static bool mm_active_cpus_exceeds(struct mm_struct *mm, int
> > threshold)
> > +{
> > +	int count = 0;
> > +	int cpu;
> 
> Is this function supposed to hold some sort of a lock?

I don't think we care too much about total accuracy here.

Not holding up other CPUs is probably more important.

> 
> > +
> > +	/* This quick check should eliminate most single threaded
> > programs. */
> > +	if (cpumask_weight(mm_cpumask(mm)) <= threshold)
> > +		return false;
> > +
> > +	/* Slower check to make sure. */
> > +	for_each_cpu(cpu, mm_cpumask(mm)) {
> 
> Needs CPU hotplug protection?

I don't see CPU hotplug protection in most other loops
that iterate over CPUs and do nothing but read some
per-CPU data.

What are we trying to protect against?

What kind of protection do we need?

> 
> > +	/* The last global ASID was consumed while waiting for the
> > lock. */
> > +	if (!global_asid_available) {
> > +		VM_WARN_ONCE(1, "Ran out of global ASIDs\n");
> 
> And? Why do we want to warn here? We need to reset global ASIDs
> anyway.

This warning prints if we run out of global ASIDs,
but have more processes in the system that want one.

This basically helps us figure out whether or not
we should bother implementing more aggressive ASID
re-use (like ARM and RISCV have), which might
require us to figure out how to make that re-use
more scalable than it is today.

> 
> > +		return;
> > +	}
> > +
> > +	asid = allocate_global_asid();
> > +	if (!asid)
> > +		return;
> > +
> > +	assign_mm_global_asid(mm, asid);
> > +}
> > +
> > +void destroy_context_free_global_asid(struct mm_struct *mm)
> 
> That name is a mouthful. mm_free_global_asid() is just fine.
> 
Done.

-- 
All Rights Reversed.