The inner loop may be replaced with the dedicated for_each_online_cpu_wrap.
It helps to avoid setting the same bits in the @mask more than once, in
case of group_size is greater than number of online CPUs.
CC: Nick Child <nnac123@linux.ibm.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
drivers/net/virtio_net.c | 12 +++++++-----
include/linux/cpumask.h | 4 ++++
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7646ddd9bef7..9d7c37e968b5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3826,7 +3826,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
cpumask_var_t mask;
int stragglers;
int group_size;
- int i, j, cpu;
+ int i, start = 0, cpu;
int num_cpu;
int stride;
@@ -3840,16 +3840,18 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
stragglers = num_cpu >= vi->curr_queue_pairs ?
num_cpu % vi->curr_queue_pairs :
0;
- cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < vi->curr_queue_pairs; i++) {
group_size = stride + (i < stragglers ? 1 : 0);
- for (j = 0; j < group_size; j++) {
+ for_each_online_cpu_wrap(cpu, start) {
+ if (!group_size--) {
+ start = cpu;
+ break;
+ }
cpumask_set_cpu(cpu, mask);
- cpu = cpumask_next_wrap(cpu, cpu_online_mask,
- nr_cpu_ids, false);
}
+
virtqueue_set_affinity(vi->rq[i].vq, mask);
virtqueue_set_affinity(vi->sq[i].vq, mask);
__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 5cf69a110c1c..30042351f15f 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1036,6 +1036,8 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
#define for_each_possible_cpu_wrap(cpu, start) \
for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
+#define for_each_online_cpu_wrap(cpu, start) \
+ for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
#else
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
@@ -1044,6 +1046,8 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
#define for_each_possible_cpu_wrap(cpu, start) \
for_each_cpu_wrap((cpu), cpu_possible_mask, (start))
+#define for_each_online_cpu_wrap(cpu, start) \
+ for_each_cpu_wrap((cpu), cpu_online_mask, (start))
#endif
/* Wrappers for arch boot code to manipulate normally-constant masks */
--
2.43.0
On Tue, Jan 28, 2025 at 11:46:31AM -0500, Yury Norov wrote:
> The inner loop may be replaced with the dedicated for_each_online_cpu_wrap.
> It helps to avoid setting the same bits in the @mask more than once, in
> case of group_size is greater than number of online CPUs.
nit: Looking at the previous logic of how group_stride is calculated, I don't
think there is possibility of "setting the same bits in the @mask more
than once". group_stride = n_cpu / n_queues
nit: I see this more as 2 patches. The introduction of a new core
helper function is a bit buried.
>
> CC: Nick Child <nnac123@linux.ibm.com>
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
Don't know if my comments alone merit a v3 and I think the patch
does simplify the codebase so:
Reviewed-by: Nick Child <nnac123@linux.ibm.com>
> ---
> drivers/net/virtio_net.c | 12 +++++++-----
> include/linux/cpumask.h | 4 ++++
> 2 files changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7646ddd9bef7..9d7c37e968b5 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -3826,7 +3826,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
> cpumask_var_t mask;
> int stragglers;
> int group_size;
> - int i, j, cpu;
> + int i, start = 0, cpu;
> int num_cpu;
> int stride;
>
> @@ -3840,16 +3840,18 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
> stragglers = num_cpu >= vi->curr_queue_pairs ?
> num_cpu % vi->curr_queue_pairs :
> 0;
> - cpu = cpumask_first(cpu_online_mask);
>
> for (i = 0; i < vi->curr_queue_pairs; i++) {
> group_size = stride + (i < stragglers ? 1 : 0);
>
> - for (j = 0; j < group_size; j++) {
> + for_each_online_cpu_wrap(cpu, start) {
> + if (!group_size--) {
> + start = cpu;
> + break;
> + }
> cpumask_set_cpu(cpu, mask);
> - cpu = cpumask_next_wrap(cpu, cpu_online_mask,
> - nr_cpu_ids, false);
> }
> +
> virtqueue_set_affinity(vi->rq[i].vq, mask);
> virtqueue_set_affinity(vi->sq[i].vq, mask);
> __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index 5cf69a110c1c..30042351f15f 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -1036,6 +1036,8 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
>
> #define for_each_possible_cpu_wrap(cpu, start) \
> for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
> +#define for_each_online_cpu_wrap(cpu, start) \
> + for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
> #else
> #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
> #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
> @@ -1044,6 +1046,8 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
>
> #define for_each_possible_cpu_wrap(cpu, start) \
> for_each_cpu_wrap((cpu), cpu_possible_mask, (start))
> +#define for_each_online_cpu_wrap(cpu, start) \
> + for_each_cpu_wrap((cpu), cpu_online_mask, (start))
> #endif
>
> /* Wrappers for arch boot code to manipulate normally-constant masks */
> --
> 2.43.0
>
Thanks for review and testing, Nick!
On Wed, Feb 05, 2025 at 05:00:23PM -0600, Nick Child wrote:
> On Tue, Jan 28, 2025 at 11:46:31AM -0500, Yury Norov wrote:
> > The inner loop may be replaced with the dedicated for_each_online_cpu_wrap.
> > It helps to avoid setting the same bits in the @mask more than once, in
> > case of group_size is greater than number of online CPUs.
>
> nit: Looking at the previous logic of how group_stride is calculated, I don't
> think there is possibility of "setting the same bits in the @mask more
> than once". group_stride = n_cpu / n_queues
>
> nit: I see this more as 2 patches. The introduction of a new core
> helper function is a bit buried.
>
> >
> > CC: Nick Child <nnac123@linux.ibm.com>
> > Signed-off-by: Yury Norov <yury.norov@gmail.com>
>
> Don't know if my comments alone merit a v3 and I think the patch
> does simplify the codebase so:
> Reviewed-by: Nick Child <nnac123@linux.ibm.com>
I fixed the comments to #2 and #3 as you suggested and split-out new
for_each() loops to the new patch.
I also think those are trivial changes not worth v3. So it's in
bitmap-for-next:
https://github.com/norov/linux/tree/bitmap-for-next
Thanks for review, Nick!
Thanks,
Yury
> > ---
> > drivers/net/virtio_net.c | 12 +++++++-----
> > include/linux/cpumask.h | 4 ++++
> > 2 files changed, 11 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 7646ddd9bef7..9d7c37e968b5 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -3826,7 +3826,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
> > cpumask_var_t mask;
> > int stragglers;
> > int group_size;
> > - int i, j, cpu;
> > + int i, start = 0, cpu;
> > int num_cpu;
> > int stride;
> >
> > @@ -3840,16 +3840,18 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
> > stragglers = num_cpu >= vi->curr_queue_pairs ?
> > num_cpu % vi->curr_queue_pairs :
> > 0;
> > - cpu = cpumask_first(cpu_online_mask);
> >
> > for (i = 0; i < vi->curr_queue_pairs; i++) {
> > group_size = stride + (i < stragglers ? 1 : 0);
> >
> > - for (j = 0; j < group_size; j++) {
> > + for_each_online_cpu_wrap(cpu, start) {
> > + if (!group_size--) {
> > + start = cpu;
> > + break;
> > + }
> > cpumask_set_cpu(cpu, mask);
> > - cpu = cpumask_next_wrap(cpu, cpu_online_mask,
> > - nr_cpu_ids, false);
> > }
> > +
> > virtqueue_set_affinity(vi->rq[i].vq, mask);
> > virtqueue_set_affinity(vi->sq[i].vq, mask);
> > __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
> > diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> > index 5cf69a110c1c..30042351f15f 100644
> > --- a/include/linux/cpumask.h
> > +++ b/include/linux/cpumask.h
> > @@ -1036,6 +1036,8 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
> >
> > #define for_each_possible_cpu_wrap(cpu, start) \
> > for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
> > +#define for_each_online_cpu_wrap(cpu, start) \
> > + for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
> > #else
> > #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
> > #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
> > @@ -1044,6 +1046,8 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
> >
> > #define for_each_possible_cpu_wrap(cpu, start) \
> > for_each_cpu_wrap((cpu), cpu_possible_mask, (start))
> > +#define for_each_online_cpu_wrap(cpu, start) \
> > + for_each_cpu_wrap((cpu), cpu_online_mask, (start))
> > #endif
> >
> > /* Wrappers for arch boot code to manipulate normally-constant masks */
> > --
> > 2.43.0
> >
© 2016 - 2026 Red Hat, Inc.