[PATCH] bpf: Simplify code by using for_each_cpu_wrap()

Punit Agrawal posted 1 patch 3 years, 7 months ago
There is a newer version of this series
kernel/bpf/percpu_freelist.c | 42 ++++++++++--------------------------
1 file changed, 11 insertions(+), 31 deletions(-)
[PATCH] bpf: Simplify code by using for_each_cpu_wrap()
Posted by Punit Agrawal 3 years, 7 months ago
In the percpu freelist code, it is a common pattern to iterate over
the possible CPUs mask starting with the current CPU. The pattern is
implemented using a hand rolled while loop with the loop variable
increment being open-coded.

Simplify the code by replacing the while() loops with
for_each_cpu_wrap() helper to iterate over the possible cpus starting
with the current CPU. As a result, some of the special-casing in the
loop also gets simplified.

No functional change intended.

Signed-off-by: Punit Agrawal <punit.agrawal@bytedance.com>
---
Hi,

I noticed an opportunity for simplifying the code while reviewing a
backport for one of the commits in this area.

Please consider merging.

Thanks,
Punit

 kernel/bpf/percpu_freelist.c | 42 ++++++++++--------------------------
 1 file changed, 11 insertions(+), 31 deletions(-)

diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 00b874c8e889..9dd9201c6f07 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -56,10 +56,9 @@ static inline bool pcpu_freelist_try_push_extra(struct pcpu_freelist *s,
 static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
 					     struct pcpu_freelist_node *node)
 {
-	int cpu, orig_cpu;
+	int cpu;
 
-	orig_cpu = cpu = raw_smp_processor_id();
-	while (1) {
+	for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
 		struct pcpu_freelist_head *head;
 
 		head = per_cpu_ptr(s->freelist, cpu);
@@ -68,15 +67,10 @@ static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
 			raw_spin_unlock(&head->lock);
 			return;
 		}
-		cpu = cpumask_next(cpu, cpu_possible_mask);
-		if (cpu >= nr_cpu_ids)
-			cpu = 0;
-
-		/* cannot lock any per cpu lock, try extralist */
-		if (cpu == orig_cpu &&
-		    pcpu_freelist_try_push_extra(s, node))
-			return;
 	}
+
+	/* cannot lock any per cpu lock, try extralist */
+	pcpu_freelist_try_push_extra(s, node);
 }
 
 void __pcpu_freelist_push(struct pcpu_freelist *s,
@@ -125,13 +119,12 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
 {
 	struct pcpu_freelist_head *head;
 	struct pcpu_freelist_node *node;
-	int orig_cpu, cpu;
+	int cpu;
 
-	orig_cpu = cpu = raw_smp_processor_id();
-	while (1) {
+	for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
 		head = per_cpu_ptr(s->freelist, cpu);
 		if (!READ_ONCE(head->first))
-			goto next_cpu;
+			continue;
 		raw_spin_lock(&head->lock);
 		node = head->first;
 		if (node) {
@@ -140,12 +133,6 @@ static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
 			return node;
 		}
 		raw_spin_unlock(&head->lock);
-next_cpu:
-		cpu = cpumask_next(cpu, cpu_possible_mask);
-		if (cpu >= nr_cpu_ids)
-			cpu = 0;
-		if (cpu == orig_cpu)
-			break;
 	}
 
 	/* per cpu lists are all empty, try extralist */
@@ -164,13 +151,12 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
 {
 	struct pcpu_freelist_head *head;
 	struct pcpu_freelist_node *node;
-	int orig_cpu, cpu;
+	int cpu;
 
-	orig_cpu = cpu = raw_smp_processor_id();
-	while (1) {
+	for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
 		head = per_cpu_ptr(s->freelist, cpu);
 		if (!READ_ONCE(head->first))
-			goto next_cpu;
+			continue;
 		if (raw_spin_trylock(&head->lock)) {
 			node = head->first;
 			if (node) {
@@ -180,12 +166,6 @@ ___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
 			}
 			raw_spin_unlock(&head->lock);
 		}
-next_cpu:
-		cpu = cpumask_next(cpu, cpu_possible_mask);
-		if (cpu >= nr_cpu_ids)
-			cpu = 0;
-		if (cpu == orig_cpu)
-			break;
 	}
 
 	/* cannot pop from per cpu lists, try extralist */
-- 
2.35.1
Re: [PATCH] bpf: Simplify code by using for_each_cpu_wrap()
Posted by Alexei Starovoitov 3 years, 7 months ago
On Wed, Aug 17, 2022 at 6:08 AM Punit Agrawal
<punit.agrawal@bytedance.com> wrote:
>
> No functional change intended.

?

> -       orig_cpu = cpu = raw_smp_processor_id();
> -       while (1) {
> +       for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
>                 struct pcpu_freelist_head *head;
>
>                 head = per_cpu_ptr(s->freelist, cpu);
> @@ -68,15 +67,10 @@ static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
>                         raw_spin_unlock(&head->lock);
>                         return;
>                 }
> -               cpu = cpumask_next(cpu, cpu_possible_mask);
> -               if (cpu >= nr_cpu_ids)
> -                       cpu = 0;
> -
> -               /* cannot lock any per cpu lock, try extralist */
> -               if (cpu == orig_cpu &&
> -                   pcpu_freelist_try_push_extra(s, node))
> -                       return;
>         }
> +
> +       /* cannot lock any per cpu lock, try extralist */
> +       pcpu_freelist_try_push_extra(s, node);

This is obviously not equivalent!
Re: Re: [PATCH] bpf: Simplify code by using for_each_cpu_wrap()
Posted by Punit Agrawal 3 years, 7 months ago
Hi Alexei,

Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:

> On Wed, Aug 17, 2022 at 6:08 AM Punit Agrawal
> <punit.agrawal@bytedance.com> wrote:
>>
>> No functional change intended.
>
> ?
>
>> -       orig_cpu = cpu = raw_smp_processor_id();
>> -       while (1) {
>> +       for_each_cpu_wrap(cpu, cpu_possible_mask, raw_smp_processor_id()) {
>>                 struct pcpu_freelist_head *head;
>>
>>                 head = per_cpu_ptr(s->freelist, cpu);
>> @@ -68,15 +67,10 @@ static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
>>                         raw_spin_unlock(&head->lock);
>>                         return;
>>                 }
>> -               cpu = cpumask_next(cpu, cpu_possible_mask);
>> -               if (cpu >= nr_cpu_ids)
>> -                       cpu = 0;
>> -
>> -               /* cannot lock any per cpu lock, try extralist */
>> -               if (cpu == orig_cpu &&
>> -                   pcpu_freelist_try_push_extra(s, node))
>> -                       return;
>>         }
>> +
>> +       /* cannot lock any per cpu lock, try extralist */
>> +       pcpu_freelist_try_push_extra(s, node);
>
> This is obviously not equivalent!

Thanks for taking a look. You're right - I missed the fact that it's an
infinite loop until the node gets pushed to one of the lists.

I'll send an update with that fixed up.