[RESEND RFC PATCH v2 06/29] cpumask: Introduce for_each_cpu_and_wrap() and bitfield helpers

K Prateek Nayak posted 29 patches 1 week, 4 days ago
Only 28 patches received!
[RESEND RFC PATCH v2 06/29] cpumask: Introduce for_each_cpu_and_wrap() and bitfield helpers
Posted by K Prateek Nayak 1 week, 4 days ago
There is a developing pattern of using cpumask_and() followed by
for_each_cpu_wrap() in the scheduler.

To avoid a temporary variable and needlessly iterating over the cpumask
twice - once for the cpumask_and() operation and the second in the
for_each_cpu_wrap() loop - introduce a new for_each_cpu_and_wrap()
helper to iterate over the common bits set on two bitfields starting at
a specific position without needing an intermediate cpumask variable.

Cc: Yury Norov <yury.norov@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
 include/linux/cpumask.h | 20 ++++++++++++++++++++
 include/linux/find.h    | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index ff8f41ab7ce6..b7a984c2a7d5 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -406,6 +406,26 @@ unsigned int cpumask_random(const struct cpumask *src)
 #define for_each_cpu_and(cpu, mask1, mask2)				\
 	for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
 
+/**
+ * for_each_cpu_wrap - iterate over every cpu in both masks, starting at a
+ * specified location.
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask1: the first cpumask pointer
+ * @mask2: the second cpumask pointer
+ * @start: the start location
+ *
+ * This saves a temporary CPU mask in many places.  It is equivalent to:
+ *	struct cpumask tmp;
+ *	cpumask_and(&tmp, &mask1, &mask2);
+ *	for_each_cpu_wrap(cpu, &tmp, start)
+ *		...
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_and_wrap(cpu, mask1, mask2, start)				\
+	for_each_and_bit_wrap(cpu, cpumask_bits(mask1), cpumask_bits(mask2),	\
+			      small_cpumask_bits, start)
+
 /**
  * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
  *			 those present in another.
diff --git a/include/linux/find.h b/include/linux/find.h
index 9d720ad92bc1..fff9e2d55e4d 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -487,6 +487,29 @@ unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
 	return bit < start ? bit : size;
 }
 
+/* Helper for for_each_and_bit_wrap(). */
+static __always_inline
+unsigned long __for_each_and_wrap(const unsigned long *bitmap1, const unsigned long *bitmap2,
+				  unsigned long size, unsigned long start, unsigned long n)
+{
+	unsigned long bit;
+
+	/* If not wrapped around */
+	if (n > start) {
+		/* and have a bit, just return it. */
+		bit = find_next_and_bit(bitmap1, bitmap2, size, n);
+		if (bit < size)
+			return bit;
+
+		/* Otherwise, wrap around and ... */
+		n = 0;
+	}
+
+	/* Search the other part. */
+	bit = find_next_and_bit(bitmap1, bitmap2, start, n);
+	return bit < start ? bit : size;
+}
+
 /**
  * find_next_clump8 - find next 8-bit clump with set bits in a memory region
  * @clump: location to store copy of found clump
@@ -682,6 +705,20 @@ unsigned long find_next_bit_le(const void *addr, unsigned
 	     (bit) < (size);							\
 	     (bit) = __for_each_wrap((addr), (size), (start), (bit) + 1))
 
+/**
+ * for_each_and_bit_wrap - iterate over all set bits in (*addr1 & *addr2)
+ * starting from @start, and wrapping around the end of bitmap.
+ * @bit: offset for current iteration
+ * @addr1: address of first bitmap
+ * @addr2: address of second bitmask to and with the first
+ * @size: bitmap size in number of bits
+ * @start: Starting bit for bitmap traversing, wrapping around the bitmap end
+ */
+#define for_each_and_bit_wrap(bit, addr1, addr2, size, start) \
+	for ((bit) = find_next_and_bit_wrap((addr1), (addr2), (size), (start));		\
+	     (bit) < (size);								\
+	     (bit) = __for_each_and_wrap((addr1), (addr2), (size), (start), (bit) + 1))
+
 /**
  * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
  * @start: bit offset to start search and to store the current iteration offset
-- 
2.43.0
Re: [RESEND RFC PATCH v2 06/29] cpumask: Introduce for_each_cpu_and_wrap() and bitfield helpers
Posted by Yury Norov 6 days, 14 hours ago
On Mon, Dec 08, 2025 at 09:26:52AM +0000, K Prateek Nayak wrote:
> There is a developing pattern of using cpumask_and() followed by
> for_each_cpu_wrap() in the scheduler.
> 
> To avoid a temporary variable and needlessly iterating over the cpumask
> twice - once for the cpumask_and() operation and the second in the
> for_each_cpu_wrap() loop - introduce a new for_each_cpu_and_wrap()
> helper to iterate over the common bits set on two bitfields starting at
> a specific position without needing an intermediate cpumask variable.
> 
> Cc: Yury Norov <yury.norov@gmail.com>
> Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
> ---
>  include/linux/cpumask.h | 20 ++++++++++++++++++++
>  include/linux/find.h    | 37 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 57 insertions(+)
> 
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index ff8f41ab7ce6..b7a984c2a7d5 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -406,6 +406,26 @@ unsigned int cpumask_random(const struct cpumask *src)
>  #define for_each_cpu_and(cpu, mask1, mask2)				\
>  	for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
>  
> +/**
> + * for_each_cpu_wrap - iterate over every cpu in both masks, starting at a

for_each_cpu_and_wrap

With that,

Acked-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>

> + * specified location.
> + * @cpu: the (optionally unsigned) integer iterator
> + * @mask1: the first cpumask pointer
> + * @mask2: the second cpumask pointer
> + * @start: the start location
> + *
> + * This saves a temporary CPU mask in many places.  It is equivalent to:
> + *	struct cpumask tmp;
> + *	cpumask_and(&tmp, &mask1, &mask2);
> + *	for_each_cpu_wrap(cpu, &tmp, start)
> + *		...
> + *
> + * After the loop, cpu is >= nr_cpu_ids.
> + */
> +#define for_each_cpu_and_wrap(cpu, mask1, mask2, start)				\
> +	for_each_and_bit_wrap(cpu, cpumask_bits(mask1), cpumask_bits(mask2),	\
> +			      small_cpumask_bits, start)
> +
>  /**
>   * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
>   *			 those present in another.
> diff --git a/include/linux/find.h b/include/linux/find.h
> index 9d720ad92bc1..fff9e2d55e4d 100644
> --- a/include/linux/find.h
> +++ b/include/linux/find.h
> @@ -487,6 +487,29 @@ unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
>  	return bit < start ? bit : size;
>  }
>  
> +/* Helper for for_each_and_bit_wrap(). */
> +static __always_inline
> +unsigned long __for_each_and_wrap(const unsigned long *bitmap1, const unsigned long *bitmap2,
> +				  unsigned long size, unsigned long start, unsigned long n)
> +{
> +	unsigned long bit;
> +
> +	/* If not wrapped around */
> +	if (n > start) {
> +		/* and have a bit, just return it. */
> +		bit = find_next_and_bit(bitmap1, bitmap2, size, n);
> +		if (bit < size)
> +			return bit;
> +
> +		/* Otherwise, wrap around and ... */
> +		n = 0;
> +	}
> +
> +	/* Search the other part. */
> +	bit = find_next_and_bit(bitmap1, bitmap2, start, n);
> +	return bit < start ? bit : size;
> +}
> +
>  /**
>   * find_next_clump8 - find next 8-bit clump with set bits in a memory region
>   * @clump: location to store copy of found clump
> @@ -682,6 +705,20 @@ unsigned long find_next_bit_le(const void *addr, unsigned
>  	     (bit) < (size);							\
>  	     (bit) = __for_each_wrap((addr), (size), (start), (bit) + 1))
>  
> +/**
> + * for_each_and_bit_wrap - iterate over all set bits in (*addr1 & *addr2)
> + * starting from @start, and wrapping around the end of bitmap.
> + * @bit: offset for current iteration
> + * @addr1: address of first bitmap
> + * @addr2: address of second bitmask to and with the first
> + * @size: bitmap size in number of bits
> + * @start: Starting bit for bitmap traversing, wrapping around the bitmap end
> + */
> +#define for_each_and_bit_wrap(bit, addr1, addr2, size, start) \
> +	for ((bit) = find_next_and_bit_wrap((addr1), (addr2), (size), (start));		\
> +	     (bit) < (size);								\
> +	     (bit) = __for_each_and_wrap((addr1), (addr2), (size), (start), (bit) + 1))
> +
>  /**
>   * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits
>   * @start: bit offset to start search and to store the current iteration offset
> -- 
> 2.43.0