1 | Hello everyone, | 1 | The ADMV1013 is a wideband, microwave upconverter optimized |
---|---|---|---|
2 | for point to point microwave radio designs operating in the | ||
3 | 24 GHz to 44 GHz radio frequency (RF) range. | ||
2 | 4 | ||
3 | There was some interest at OSPM'25 to explore using the push task | 5 | Datasheet: |
4 | mechanism for idle and newidle balance. This series implements one such | 6 | https://www.analog.com/media/en/technical-documentation/data-sheets/ADMV1013.pdf |
5 | idea. The main reason for the RFC is to understand if this is the | ||
6 | implementation people were in favor of before trying to optimize it for | ||
7 | all the workloads from my test setup. | ||
8 | 7 | ||
9 | Note: The current performance of the prototype is rough. I haven't | 8 | NOTE: |
10 | optimized it yet since I would love some feedback first on the approach. | 9 | Currently depends on 64-bit architecture since the input |
10 | clock that server as Local Oscillator should support values | ||
11 | in the range 5.4 GHz to 10.25 GHz. | ||
11 | 12 | ||
13 | We might need some scaling implementation in the clock | ||
14 | framework so that u64 types are supported when using 32-bit | ||
15 | architectures. | ||
12 | 16 | ||
13 | Current approach | 17 | Antoniu Miclaus (2): |
14 | ================ | 18 | iio: frequency: admv1013: add support for ADMV1013 |
19 | dt-bindings: iio: frequency: add admv1013 doc | ||
15 | 20 | ||
16 | The push task framework for fair class has been cherry-pick from | 21 | .../bindings/iio/frequency/adi,admv1013.yaml | 110 ++++ |
17 | Vincent's series and has been implemented for !EAS case. | 22 | drivers/iio/frequency/Kconfig | 13 + |
23 | drivers/iio/frequency/Makefile | 1 + | ||
24 | drivers/iio/frequency/admv1013.c | 579 ++++++++++++++++++ | ||
25 | 4 files changed, 703 insertions(+) | ||
26 | create mode 100644 Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml | ||
27 | create mode 100644 drivers/iio/frequency/admv1013.c | ||
18 | 28 | ||
19 | This series implements the idea from Valentin [2] where, in presence of | 29 | -- |
20 | pushable tasks, the CPU will set itself on a per-LLC "overloaded_mask". | 30 | 2.33.1 |
21 | 31 | ||
22 | The inter-NUMA newidle balance has been modified to traverse the CPUs | ||
23 | set on the overloaded mask, first in the local-LLC, and then CPUs set on | ||
24 | overloaded mask of other LLCs in same NUMA node with the goal of pulling | ||
25 | a single task towards itself rather than performing a full fledged load | ||
26 | balancing. | ||
27 | |||
28 | This implements some of the ideas from David Vernet's SAHRED_RUNQ | ||
29 | prototype [3] except, instead of a single SHARED_RUNQ per-LLC / | ||
30 | per-shard, the overloaded mask serves an indicator of per-CPU rq(s) | ||
31 | containing pushable task that can be migrated to the CPU going idle. | ||
32 | This avoids having a per-SHARED_RUNQ lock at the expense of maintaining | ||
33 | the overloaded cpumask. | ||
34 | |||
35 | The push callback itself has been modified to try push the tasks on the | ||
36 | pushable task list to one of the CPUs on the "nohz.idle_cpus_mask" | ||
37 | taking the load off of idle balancing. | ||
38 | |||
39 | |||
40 | Clarification required | ||
41 | ====================== | ||
42 | |||
43 | I believe using the per-CPU pushable task list as a proxy for a single | ||
44 | SHARED_RUNQ was the idea Peter was implying during the discussion. Is | ||
45 | this correct or did I completely misunderstand it? P.S. SHARED_RUNQ | ||
46 | could also be modelled as a large per-LLC push list. | ||
47 | |||
48 | An alternate implementation is to allow CPUs to go to idle as quickly as | ||
49 | possible and then rely completely on push mechanism and the | ||
50 | "idle_cpu_mask" to push task to an idle CPU however this puts the burden | ||
51 | of moving tasks on a busy overloaded CPU which may not be ideal. | ||
52 | |||
53 | Since folks mentioned using "push mechanism" for newidle balance, was | ||
54 | the above idea the one they had in mind? | ||
55 | |||
56 | There seems to be some clear advantage from doing a complete balance in | ||
57 | the newidle path. Since the schedstats are not rigged up yet for the new | ||
58 | approach, I'm not completely sure where the advantages vs disadvantages | ||
59 | are currently. | ||
60 | |||
61 | If the current approach is right, I'll dig deeper to try address all the | ||
62 | shortcomings of this prototype. | ||
63 | |||
64 | Systems with unified LLC will likely run into bottlenecks to maintain a | ||
65 | large per-LLC mask that can have multiple concurrent updates. I have | ||
66 | plans to implement a "sd_shard" which shards the large LLC making the | ||
67 | cpumask maintenance less heavy on these systems. | ||
68 | |||
69 | |||
70 | References | ||
71 | ========== | ||
72 | |||
73 | [1] https://lore.kernel.org/lkml/20250302210539.1563190-6-vincent.guittot@linaro.org/ | ||
74 | [2] https://lore.kernel.org/lkml/xhsmh1putoxbz.mognet@vschneid-thinkpadt14sgen2i.remote.csb/ | ||
75 | [3] https://lore.kernel.org/lkml/20231212003141.216236-1-void@manifault.com/ | ||
76 | |||
77 | -- | ||
78 | K Prateek Nayak (4): | ||
79 | sched/fair: Introduce overloaded_mask in sched_domain_shared | ||
80 | sched/fair: Update overloaded mask in presence of pushable task | ||
81 | sched/fair: Rework inter-NUMA newidle balancing | ||
82 | sched/fair: Proactive idle balance using push mechanism | ||
83 | |||
84 | Vincent Guittot (1): | ||
85 | sched/fair: Add push task framework | ||
86 | |||
87 | include/linux/sched/topology.h | 1 + | ||
88 | kernel/sched/fair.c | 297 +++++++++++++++++++++++++++++++-- | ||
89 | kernel/sched/sched.h | 2 + | ||
90 | kernel/sched/topology.c | 25 ++- | ||
91 | 4 files changed, 306 insertions(+), 19 deletions(-) | ||
92 | |||
93 | |||
94 | base-commit: 6432e163ba1b7d80b5876792ce53e511f041ab91 | ||
95 | -- | ||
96 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vincent Guittot <vincent.guittot@linaro.org> | ||
2 | 1 | ||
3 | Add the skeleton for push task infrastructure. The empty | ||
4 | push_fair_task() prototype will be used to implement proactive idle | ||
5 | balancing in subsequent commits. | ||
6 | |||
7 | [ prateek: Broke off relevant bits from [1] ] | ||
8 | |||
9 | Link: https://lore.kernel.org/all/20250302210539.1563190-6-vincent.guittot@linaro.org/ [1] | ||
10 | Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> | ||
11 | Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> | ||
12 | --- | ||
13 | kernel/sched/fair.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ | ||
14 | kernel/sched/sched.h | 2 ++ | ||
15 | 2 files changed, 87 insertions(+) | ||
16 | |||
17 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/kernel/sched/fair.c | ||
20 | +++ b/kernel/sched/fair.c | ||
21 | @@ -XXX,XX +XXX,XX @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | ||
22 | hrtick_update(rq); | ||
23 | } | ||
24 | |||
25 | +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p); | ||
26 | static void set_next_buddy(struct sched_entity *se); | ||
27 | |||
28 | /* | ||
29 | @@ -XXX,XX +XXX,XX @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) | ||
30 | h_nr_idle = task_has_idle_policy(p); | ||
31 | if (task_sleep || task_delayed || !se->sched_delayed) | ||
32 | h_nr_runnable = 1; | ||
33 | + | ||
34 | + fair_remove_pushable_task(rq, p); | ||
35 | } else { | ||
36 | cfs_rq = group_cfs_rq(se); | ||
37 | slice = cfs_rq_min_slice(cfs_rq); | ||
38 | @@ -XXX,XX +XXX,XX @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) | ||
39 | return target; | ||
40 | } | ||
41 | |||
42 | +static inline bool fair_push_task(struct task_struct *p) | ||
43 | +{ | ||
44 | + if (!task_on_rq_queued(p)) | ||
45 | + return false; | ||
46 | + | ||
47 | + if (p->se.sched_delayed) | ||
48 | + return false; | ||
49 | + | ||
50 | + if (p->nr_cpus_allowed == 1) | ||
51 | + return false; | ||
52 | + | ||
53 | + return true; | ||
54 | +} | ||
55 | + | ||
56 | +static inline int has_pushable_tasks(struct rq *rq) | ||
57 | +{ | ||
58 | + return !plist_head_empty(&rq->cfs.pushable_tasks); | ||
59 | +} | ||
60 | + | ||
61 | +/* | ||
62 | + * See if the non running fair tasks on this rq can be sent on other CPUs | ||
63 | + * that fits better with their profile. | ||
64 | + */ | ||
65 | +static bool push_fair_task(struct rq *rq) | ||
66 | +{ | ||
67 | + return false; | ||
68 | +} | ||
69 | + | ||
70 | +static void push_fair_tasks(struct rq *rq) | ||
71 | +{ | ||
72 | + /* push_fair_task() will return true if it moved a fair task */ | ||
73 | + while (push_fair_task(rq)) | ||
74 | + ; | ||
75 | +} | ||
76 | + | ||
77 | +static DEFINE_PER_CPU(struct balance_callback, fair_push_head); | ||
78 | + | ||
79 | +static inline void fair_queue_pushable_tasks(struct rq *rq) | ||
80 | +{ | ||
81 | + if (!has_pushable_tasks(rq)) | ||
82 | + return; | ||
83 | + | ||
84 | + queue_balance_callback(rq, &per_cpu(fair_push_head, rq->cpu), push_fair_tasks); | ||
85 | +} | ||
86 | +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) | ||
87 | +{ | ||
88 | + plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks); | ||
89 | +} | ||
90 | + | ||
91 | +static void fair_add_pushable_task(struct rq *rq, struct task_struct *p) | ||
92 | +{ | ||
93 | + if (fair_push_task(p)) { | ||
94 | + plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks); | ||
95 | + plist_node_init(&p->pushable_tasks, p->prio); | ||
96 | + plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks); | ||
97 | + } | ||
98 | +} | ||
99 | + | ||
100 | /* | ||
101 | * select_task_rq_fair: Select target runqueue for the waking task in domains | ||
102 | * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE, | ||
103 | @@ -XXX,XX +XXX,XX @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) | ||
104 | return sched_balance_newidle(rq, rf) != 0; | ||
105 | } | ||
106 | #else | ||
107 | +static inline void fair_queue_pushable_tasks(struct rq *rq) {} | ||
108 | +static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) {} | ||
109 | +static inline void fair_add_pushable_task(struct rq *rq, struct task_struct *p) {} | ||
110 | static inline void set_task_max_allowed_capacity(struct task_struct *p) {} | ||
111 | #endif /* CONFIG_SMP */ | ||
112 | |||
113 | @@ -XXX,XX +XXX,XX @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf | ||
114 | put_prev_entity(cfs_rq, pse); | ||
115 | set_next_entity(cfs_rq, se); | ||
116 | |||
117 | + /* | ||
118 | + * The previous task might be eligible for being pushed on | ||
119 | + * another cpu if it is still active. | ||
120 | + */ | ||
121 | + fair_add_pushable_task(rq, prev); | ||
122 | + | ||
123 | __set_next_task_fair(rq, p, true); | ||
124 | } | ||
125 | |||
126 | @@ -XXX,XX +XXX,XX @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct t | ||
127 | cfs_rq = cfs_rq_of(se); | ||
128 | put_prev_entity(cfs_rq, se); | ||
129 | } | ||
130 | + | ||
131 | + /* | ||
132 | + * The previous task might be eligible for being pushed on another cpu | ||
133 | + * if it is still active. | ||
134 | + */ | ||
135 | + fair_add_pushable_task(rq, prev); | ||
136 | + | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | @@ -XXX,XX +XXX,XX @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs | ||
141 | { | ||
142 | struct sched_entity *se = &p->se; | ||
143 | |||
144 | + fair_remove_pushable_task(rq, p); | ||
145 | + | ||
146 | #ifdef CONFIG_SMP | ||
147 | if (task_on_rq_queued(p)) { | ||
148 | /* | ||
149 | @@ -XXX,XX +XXX,XX @@ static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool firs | ||
150 | if (hrtick_enabled_fair(rq)) | ||
151 | hrtick_start_fair(rq, p); | ||
152 | |||
153 | + /* | ||
154 | + * Try to push prev task before checking misfit for next task as | ||
155 | + * the migration of prev can make next fitting the CPU | ||
156 | + */ | ||
157 | + fair_queue_pushable_tasks(rq); | ||
158 | update_misfit_status(p, rq); | ||
159 | sched_fair_update_stop_tick(rq, p); | ||
160 | } | ||
161 | @@ -XXX,XX +XXX,XX @@ void init_cfs_rq(struct cfs_rq *cfs_rq) | ||
162 | cfs_rq->tasks_timeline = RB_ROOT_CACHED; | ||
163 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | ||
164 | #ifdef CONFIG_SMP | ||
165 | + plist_head_init(&cfs_rq->pushable_tasks); | ||
166 | raw_spin_lock_init(&cfs_rq->removed.lock); | ||
167 | #endif | ||
168 | } | ||
169 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h | ||
170 | index XXXXXXX..XXXXXXX 100644 | ||
171 | --- a/kernel/sched/sched.h | ||
172 | +++ b/kernel/sched/sched.h | ||
173 | @@ -XXX,XX +XXX,XX @@ struct cfs_rq { | ||
174 | struct list_head leaf_cfs_rq_list; | ||
175 | struct task_group *tg; /* group that "owns" this runqueue */ | ||
176 | |||
177 | + struct plist_head pushable_tasks; | ||
178 | + | ||
179 | /* Locally cached copy of our task_group's idle value */ | ||
180 | int idle; | ||
181 | |||
182 | -- | ||
183 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | Introduce a new cpumask member "overloaded_mask" in sched_domain_shared. | ||
2 | This mask will be used to keep track of overloaded CPUs with pushable | ||
3 | tasks on them and will be later used by newidle balance to only scan | ||
4 | through the overloaded CPUs to pull a task to it. | ||
5 | 1 | ||
6 | Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> | ||
7 | --- | ||
8 | include/linux/sched/topology.h | 1 + | ||
9 | kernel/sched/topology.c | 25 ++++++++++++++++++------- | ||
10 | 2 files changed, 19 insertions(+), 7 deletions(-) | ||
11 | |||
12 | diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/include/linux/sched/topology.h | ||
15 | +++ b/include/linux/sched/topology.h | ||
16 | @@ -XXX,XX +XXX,XX @@ struct sched_domain_shared { | ||
17 | atomic_t nr_busy_cpus; | ||
18 | int has_idle_cores; | ||
19 | int nr_idle_scan; | ||
20 | + cpumask_var_t overloaded_mask; | ||
21 | }; | ||
22 | |||
23 | struct sched_domain { | ||
24 | diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c | ||
25 | index XXXXXXX..XXXXXXX 100644 | ||
26 | --- a/kernel/sched/topology.c | ||
27 | +++ b/kernel/sched/topology.c | ||
28 | @@ -XXX,XX +XXX,XX @@ static void destroy_sched_domain(struct sched_domain *sd) | ||
29 | */ | ||
30 | free_sched_groups(sd->groups, 1); | ||
31 | |||
32 | - if (sd->shared && atomic_dec_and_test(&sd->shared->ref)) | ||
33 | + if (sd->shared && atomic_dec_and_test(&sd->shared->ref)) { | ||
34 | + free_cpumask_var(sd->shared->overloaded_mask); | ||
35 | kfree(sd->shared); | ||
36 | + } | ||
37 | kfree(sd); | ||
38 | } | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ static int __sdt_alloc(const struct cpumask *cpu_map) | ||
41 | return -ENOMEM; | ||
42 | |||
43 | for_each_cpu(j, cpu_map) { | ||
44 | + int node = cpu_to_node(j); | ||
45 | struct sched_domain *sd; | ||
46 | struct sched_domain_shared *sds; | ||
47 | struct sched_group *sg; | ||
48 | struct sched_group_capacity *sgc; | ||
49 | |||
50 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | ||
51 | - GFP_KERNEL, cpu_to_node(j)); | ||
52 | + GFP_KERNEL, node); | ||
53 | if (!sd) | ||
54 | return -ENOMEM; | ||
55 | |||
56 | *per_cpu_ptr(sdd->sd, j) = sd; | ||
57 | |||
58 | sds = kzalloc_node(sizeof(struct sched_domain_shared), | ||
59 | - GFP_KERNEL, cpu_to_node(j)); | ||
60 | + GFP_KERNEL, node); | ||
61 | if (!sds) | ||
62 | return -ENOMEM; | ||
63 | |||
64 | + if (!zalloc_cpumask_var_node(&sds->overloaded_mask, GFP_KERNEL, node)) | ||
65 | + return -ENOMEM; | ||
66 | + | ||
67 | *per_cpu_ptr(sdd->sds, j) = sds; | ||
68 | |||
69 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
70 | - GFP_KERNEL, cpu_to_node(j)); | ||
71 | + GFP_KERNEL, node); | ||
72 | if (!sg) | ||
73 | return -ENOMEM; | ||
74 | |||
75 | @@ -XXX,XX +XXX,XX @@ static int __sdt_alloc(const struct cpumask *cpu_map) | ||
76 | *per_cpu_ptr(sdd->sg, j) = sg; | ||
77 | |||
78 | sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(), | ||
79 | - GFP_KERNEL, cpu_to_node(j)); | ||
80 | + GFP_KERNEL, node); | ||
81 | if (!sgc) | ||
82 | return -ENOMEM; | ||
83 | |||
84 | @@ -XXX,XX +XXX,XX @@ static void __sdt_free(const struct cpumask *cpu_map) | ||
85 | kfree(*per_cpu_ptr(sdd->sd, j)); | ||
86 | } | ||
87 | |||
88 | - if (sdd->sds) | ||
89 | - kfree(*per_cpu_ptr(sdd->sds, j)); | ||
90 | + if (sdd->sds) { | ||
91 | + struct sched_domain_shared *sds = *per_cpu_ptr(sdd->sds, j); | ||
92 | + | ||
93 | + if (sds) | ||
94 | + free_cpumask_var(sds->overloaded_mask); | ||
95 | + kfree(sds); | ||
96 | + } | ||
97 | if (sdd->sg) | ||
98 | kfree(*per_cpu_ptr(sdd->sg, j)); | ||
99 | if (sdd->sgc) | ||
100 | -- | ||
101 | 2.34.1 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | In presence of pushable tasks on the CPU, set it on the newly introduced | ||
2 | "overloaded+mask" in sched_domain_shared struct. This will be used by | ||
3 | the newidle balance to limit the scanning to these overloaded CPUs since | ||
4 | they contain tasks that could be run on the newly idle target. | ||
5 | 1 | ||
6 | Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> | ||
7 | --- | ||
8 | kernel/sched/fair.c | 24 ++++++++++++++++++++++++ | ||
9 | 1 file changed, 24 insertions(+) | ||
10 | |||
11 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/kernel/sched/fair.c | ||
14 | +++ b/kernel/sched/fair.c | ||
15 | @@ -XXX,XX +XXX,XX @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) | ||
16 | return target; | ||
17 | } | ||
18 | |||
19 | +static inline void update_overloaded_mask(int cpu, bool contains_pushable) | ||
20 | +{ | ||
21 | + struct sched_domain_shared *sd_share = rcu_dereference(per_cpu(sd_llc_shared, cpu)); | ||
22 | + cpumask_var_t overloaded_mask; | ||
23 | + | ||
24 | + if (!sd_share) | ||
25 | + return; | ||
26 | + | ||
27 | + overloaded_mask = sd_share->overloaded_mask; | ||
28 | + if (!overloaded_mask) | ||
29 | + return; | ||
30 | + | ||
31 | + if (contains_pushable) | ||
32 | + cpumask_set_cpu(cpu, overloaded_mask); | ||
33 | + else | ||
34 | + cpumask_clear_cpu(cpu, overloaded_mask); | ||
35 | +} | ||
36 | + | ||
37 | static inline bool fair_push_task(struct task_struct *p) | ||
38 | { | ||
39 | if (!task_on_rq_queued(p)) | ||
40 | @@ -XXX,XX +XXX,XX @@ static inline void fair_queue_pushable_tasks(struct rq *rq) | ||
41 | static void fair_remove_pushable_task(struct rq *rq, struct task_struct *p) | ||
42 | { | ||
43 | plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks); | ||
44 | + | ||
45 | + if (!has_pushable_tasks(rq)) | ||
46 | + update_overloaded_mask(rq->cpu, false); | ||
47 | } | ||
48 | |||
49 | static void fair_add_pushable_task(struct rq *rq, struct task_struct *p) | ||
50 | { | ||
51 | if (fair_push_task(p)) { | ||
52 | + if (!has_pushable_tasks(rq)) | ||
53 | + update_overloaded_mask(rq->cpu, true); | ||
54 | + | ||
55 | plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks); | ||
56 | plist_node_init(&p->pushable_tasks, p->prio); | ||
57 | plist_add(&p->pushable_tasks, &rq->cfs.pushable_tasks); | ||
58 | -- | ||
59 | 2.34.1 | diff view generated by jsdifflib |
1 | Proactively try to push tasks to one of the CPUs set in the | 1 | The ADMV1013 is a wideband, microwave upconverter optimized |
---|---|---|---|
2 | "nohz.idle_cpus_mask" from the push callback. | 2 | for point to point microwave radio designs operating in the |
3 | 24 GHz to 44 GHz radio frequency (RF) range. | ||
3 | 4 | ||
4 | pick_next_pushable_fair_task() is taken from Vincent's series [1] as is | 5 | Datasheet: |
5 | but the locking rules in push_fair_task() has been relaxed to release | 6 | https://www.analog.com/media/en/technical-documentation/data-sheets/ADMV1013.pdf |
6 | the local rq lock after dequeuing the task and reacquiring it after | ||
7 | pushing it to the idle target. | ||
8 | 7 | ||
9 | double_lock_balance() used in RT seems necessary to maintain strict | 8 | Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com> |
10 | priority ordering however that may not be necessary for fair tasks. | 9 | --- |
10 | drivers/iio/frequency/Kconfig | 13 + | ||
11 | drivers/iio/frequency/Makefile | 1 + | ||
12 | drivers/iio/frequency/admv1013.c | 579 +++++++++++++++++++++++++++++++ | ||
13 | 3 files changed, 593 insertions(+) | ||
14 | create mode 100644 drivers/iio/frequency/admv1013.c | ||
11 | 15 | ||
12 | Link: https://lore.kernel.org/all/20250302210539.1563190-6-vincent.guittot@linaro.org/ [1] | 16 | diff --git a/drivers/iio/frequency/Kconfig b/drivers/iio/frequency/Kconfig |
13 | Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> | 17 | index XXXXXXX..XXXXXXX 100644 |
14 | --- | 18 | --- a/drivers/iio/frequency/Kconfig |
15 | kernel/sched/fair.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ | 19 | +++ b/drivers/iio/frequency/Kconfig |
16 | 1 file changed, 59 insertions(+) | 20 | @@ -XXX,XX +XXX,XX @@ config ADF4371 |
21 | |||
22 | To compile this driver as a module, choose M here: the | ||
23 | module will be called adf4371. | ||
24 | + | ||
25 | +config ADMV1013 | ||
26 | + tristate "Analog Devices ADMV1013 Microwave Upconverter" | ||
27 | + depends on SPI | ||
28 | + depends on COMMON_CLK | ||
29 | + depends on 64BIT | ||
30 | + help | ||
31 | + Say yes here to build support for Analog Devices ADMV1013 | ||
32 | + 24 GHz to 44 GHz, Wideband, Microwave Upconverter. | ||
33 | + | ||
34 | + To compile this driver as a module, choose M here: the | ||
35 | + module will be called admv1013. | ||
36 | + | ||
37 | endmenu | ||
38 | endmenu | ||
39 | diff --git a/drivers/iio/frequency/Makefile b/drivers/iio/frequency/Makefile | ||
40 | index XXXXXXX..XXXXXXX 100644 | ||
41 | --- a/drivers/iio/frequency/Makefile | ||
42 | +++ b/drivers/iio/frequency/Makefile | ||
43 | @@ -XXX,XX +XXX,XX @@ | ||
44 | obj-$(CONFIG_AD9523) += ad9523.o | ||
45 | obj-$(CONFIG_ADF4350) += adf4350.o | ||
46 | obj-$(CONFIG_ADF4371) += adf4371.o | ||
47 | +obj-$(CONFIG_ADMV1013) += admv1013.o | ||
48 | diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c | ||
49 | new file mode 100644 | ||
50 | index XXXXXXX..XXXXXXX | ||
51 | --- /dev/null | ||
52 | +++ b/drivers/iio/frequency/admv1013.c | ||
53 | @@ -XXX,XX +XXX,XX @@ | ||
54 | +// SPDX-License-Identifier: GPL-2.0-only | ||
55 | +/* | ||
56 | + * ADMV1013 driver | ||
57 | + * | ||
58 | + * Copyright 2021 Analog Devices Inc. | ||
59 | + */ | ||
60 | + | ||
61 | +#include <linux/bitfield.h> | ||
62 | +#include <linux/bitops.h> | ||
63 | +#include <linux/bits.h> | ||
64 | +#include <linux/clk.h> | ||
65 | +#include <linux/clkdev.h> | ||
66 | +#include <linux/clk-provider.h> | ||
67 | +#include <linux/device.h> | ||
68 | +#include <linux/iio/iio.h> | ||
69 | +#include <linux/module.h> | ||
70 | +#include <linux/notifier.h> | ||
71 | +#include <linux/regmap.h> | ||
72 | +#include <linux/regulator/consumer.h> | ||
73 | +#include <linux/spi/spi.h> | ||
74 | + | ||
75 | +#include <asm/unaligned.h> | ||
76 | + | ||
77 | +/* ADMV1013 Register Map */ | ||
78 | +#define ADMV1013_REG_SPI_CONTROL 0x00 | ||
79 | +#define ADMV1013_REG_ALARM 0x01 | ||
80 | +#define ADMV1013_REG_ALARM_MASKS 0x02 | ||
81 | +#define ADMV1013_REG_ENABLE 0x03 | ||
82 | +#define ADMV1013_REG_LO_AMP_I 0x05 | ||
83 | +#define ADMV1013_REG_LO_AMP_Q 0x06 | ||
84 | +#define ADMV1013_REG_OFFSET_ADJUST_I 0x07 | ||
85 | +#define ADMV1013_REG_OFFSET_ADJUST_Q 0x08 | ||
86 | +#define ADMV1013_REG_QUAD 0x09 | ||
87 | +#define ADMV1013_REG_VVA_TEMP_COMP 0x0A | ||
88 | + | ||
89 | +/* ADMV1013_REG_SPI_CONTROL Map */ | ||
90 | +#define ADMV1013_PARITY_EN_MSK BIT(15) | ||
91 | +#define ADMV1013_SPI_SOFT_RESET_MSK BIT(14) | ||
92 | +#define ADMV1013_CHIP_ID_MSK GENMASK(11, 4) | ||
93 | +#define ADMV1013_CHIP_ID 0xA | ||
94 | +#define ADMV1013_REVISION_ID_MSK GENMASK(3, 0) | ||
95 | + | ||
96 | +/* ADMV1013_REG_ALARM Map */ | ||
97 | +#define ADMV1013_PARITY_ERROR_MSK BIT(15) | ||
98 | +#define ADMV1013_TOO_FEW_ERRORS_MSK BIT(14) | ||
99 | +#define ADMV1013_TOO_MANY_ERRORS_MSK BIT(13) | ||
100 | +#define ADMV1013_ADDRESS_RANGE_ERROR_MSK BIT(12) | ||
101 | + | ||
102 | +/* ADMV1013_REG_ENABLE Map */ | ||
103 | +#define ADMV1013_VGA_PD_MSK BIT(15) | ||
104 | +#define ADMV1013_MIXER_PD_MSK BIT(14) | ||
105 | +#define ADMV1013_QUAD_PD_MSK GENMASK(13, 11) | ||
106 | +#define ADMV1013_BG_PD_MSK BIT(10) | ||
107 | +#define ADMV1013_MIXER_IF_EN_MSK BIT(7) | ||
108 | +#define ADMV1013_DET_EN_MSK BIT(5) | ||
109 | + | ||
110 | +/* ADMV1013_REG_LO_AMP_I Map */ | ||
111 | +#define ADMV1013_LOAMP_PH_ADJ_I_FINE_MSK GENMASK(13, 7) | ||
112 | +#define ADMV1013_MIXER_VGATE_MSK GENMASK(6, 0) | ||
113 | + | ||
114 | +/* ADMV1013_REG_LO_AMP_Q Map */ | ||
115 | +#define ADMV1013_LOAMP_PH_ADJ_Q_FINE_MSK GENMASK(13, 7) | ||
116 | + | ||
117 | +/* ADMV1013_REG_OFFSET_ADJUST_I Map */ | ||
118 | +#define ADMV1013_MIXER_OFF_ADJ_I_P_MSK GENMASK(15, 9) | ||
119 | +#define ADMV1013_MIXER_OFF_ADJ_I_N_MSK GENMASK(8, 2) | ||
120 | + | ||
121 | +/* ADMV1013_REG_OFFSET_ADJUST_Q Map */ | ||
122 | +#define ADMV1013_MIXER_OFF_ADJ_Q_P_MSK GENMASK(15, 9) | ||
123 | +#define ADMV1013_MIXER_OFF_ADJ_Q_N_MSK GENMASK(8, 2) | ||
124 | + | ||
125 | +/* ADMV1013_REG_QUAD Map */ | ||
126 | +#define ADMV1013_QUAD_SE_MODE_MSK GENMASK(9, 6) | ||
127 | +#define ADMV1013_QUAD_FILTERS_MSK GENMASK(3, 0) | ||
128 | + | ||
129 | +/* ADMV1013_REG_VVA_TEMP_COMP Map */ | ||
130 | +#define ADMV1013_VVA_TEMP_COMP_MSK GENMASK(15, 0) | ||
131 | + | ||
132 | +struct admv1013_state { | ||
133 | + struct spi_device *spi; | ||
134 | + struct clk *clkin; | ||
135 | + /* Protect against concurrent accesses to the device */ | ||
136 | + struct mutex lock; | ||
137 | + struct regulator *reg; | ||
138 | + struct notifier_block nb; | ||
139 | + unsigned int quad_se_mode; | ||
140 | + bool vga_pd; | ||
141 | + bool mixer_pd; | ||
142 | + bool quad_pd; | ||
143 | + bool bg_pd; | ||
144 | + bool mixer_if_en; | ||
145 | + bool det_en; | ||
146 | + u8 data[3] ____cacheline_aligned; | ||
147 | +}; | ||
148 | + | ||
149 | +static int __admv1013_spi_read(struct admv1013_state *st, unsigned int reg, | ||
150 | + unsigned int *val) | ||
151 | +{ | ||
152 | + int ret; | ||
153 | + struct spi_transfer t = {0}; | ||
154 | + | ||
155 | + st->data[0] = 0x80 | (reg << 1); | ||
156 | + st->data[1] = 0x0; | ||
157 | + st->data[2] = 0x0; | ||
158 | + | ||
159 | + t.rx_buf = &st->data[0]; | ||
160 | + t.tx_buf = &st->data[0]; | ||
161 | + t.len = 3; | ||
162 | + | ||
163 | + ret = spi_sync_transfer(st->spi, &t, 1); | ||
164 | + if (ret) | ||
165 | + return ret; | ||
166 | + | ||
167 | + *val = (get_unaligned_be24(&st->data[0]) >> 1) & GENMASK(15, 0); | ||
168 | + | ||
169 | + return ret; | ||
170 | +} | ||
171 | + | ||
172 | +static int admv1013_spi_read(struct admv1013_state *st, unsigned int reg, | ||
173 | + unsigned int *val) | ||
174 | +{ | ||
175 | + int ret; | ||
176 | + | ||
177 | + mutex_lock(&st->lock); | ||
178 | + ret = __admv1013_spi_read(st, reg, val); | ||
179 | + mutex_unlock(&st->lock); | ||
180 | + | ||
181 | + return ret; | ||
182 | +} | ||
183 | + | ||
184 | +static int __admv1013_spi_write(struct admv1013_state *st, | ||
185 | + unsigned int reg, | ||
186 | + unsigned int val) | ||
187 | +{ | ||
188 | + put_unaligned_be24((val << 1) | (reg << 17), &st->data[0]); | ||
189 | + | ||
190 | + return spi_write(st->spi, &st->data[0], 3); | ||
191 | +} | ||
192 | + | ||
193 | +static int admv1013_spi_write(struct admv1013_state *st, unsigned int reg, | ||
194 | + unsigned int val) | ||
195 | +{ | ||
196 | + int ret; | ||
197 | + | ||
198 | + mutex_lock(&st->lock); | ||
199 | + ret = __admv1013_spi_write(st, reg, val); | ||
200 | + mutex_unlock(&st->lock); | ||
201 | + | ||
202 | + return ret; | ||
203 | +} | ||
204 | + | ||
205 | +static int __admv1013_spi_update_bits(struct admv1013_state *st, unsigned int reg, | ||
206 | + unsigned int mask, unsigned int val) | ||
207 | +{ | ||
208 | + int ret; | ||
209 | + unsigned int data, temp; | ||
210 | + | ||
211 | + ret = __admv1013_spi_read(st, reg, &data); | ||
212 | + if (ret) | ||
213 | + return ret; | ||
214 | + | ||
215 | + temp = (data & ~mask) | (val & mask); | ||
216 | + | ||
217 | + return __admv1013_spi_write(st, reg, temp); | ||
218 | +} | ||
219 | + | ||
220 | +static int admv1013_spi_update_bits(struct admv1013_state *st, unsigned int reg, | ||
221 | + unsigned int mask, unsigned int val) | ||
222 | +{ | ||
223 | + int ret; | ||
224 | + | ||
225 | + mutex_lock(&st->lock); | ||
226 | + ret = __admv1013_spi_update_bits(st, reg, mask, val); | ||
227 | + mutex_unlock(&st->lock); | ||
228 | + | ||
229 | + return ret; | ||
230 | +} | ||
231 | + | ||
232 | +static int admv1013_read_raw(struct iio_dev *indio_dev, | ||
233 | + struct iio_chan_spec const *chan, | ||
234 | + int *val, int *val2, long info) | ||
235 | +{ | ||
236 | + struct admv1013_state *st = iio_priv(indio_dev); | ||
237 | + unsigned int data; | ||
238 | + int ret; | ||
239 | + | ||
240 | + switch (info) { | ||
241 | + case IIO_CHAN_INFO_OFFSET: | ||
242 | + if (chan->channel2 == IIO_MOD_I) { | ||
243 | + ret = admv1013_spi_read(st, ADMV1013_REG_OFFSET_ADJUST_I, &data); | ||
244 | + if (ret) | ||
245 | + return ret; | ||
246 | + | ||
247 | + *val = FIELD_GET(ADMV1013_MIXER_OFF_ADJ_I_P_MSK, data); | ||
248 | + *val2 = FIELD_GET(ADMV1013_MIXER_OFF_ADJ_I_N_MSK, data); | ||
249 | + } else { | ||
250 | + ret = admv1013_spi_read(st, ADMV1013_REG_OFFSET_ADJUST_Q, &data); | ||
251 | + if (ret) | ||
252 | + return ret; | ||
253 | + | ||
254 | + *val = FIELD_GET(ADMV1013_MIXER_OFF_ADJ_Q_P_MSK, data); | ||
255 | + *val2 = FIELD_GET(ADMV1013_MIXER_OFF_ADJ_Q_N_MSK, data); | ||
256 | + } | ||
257 | + | ||
258 | + return IIO_VAL_INT_MULTIPLE; | ||
259 | + case IIO_CHAN_INFO_PHASE: | ||
260 | + if (chan->channel2 == IIO_MOD_I) { | ||
261 | + ret = admv1013_spi_read(st, ADMV1013_REG_LO_AMP_I, &data); | ||
262 | + if (ret) | ||
263 | + return ret; | ||
264 | + | ||
265 | + *val = FIELD_GET(ADMV1013_LOAMP_PH_ADJ_I_FINE_MSK, data); | ||
266 | + } else { | ||
267 | + ret = admv1013_spi_read(st, ADMV1013_REG_LO_AMP_Q, &data); | ||
268 | + if (ret) | ||
269 | + return ret; | ||
270 | + | ||
271 | + *val = FIELD_GET(ADMV1013_LOAMP_PH_ADJ_Q_FINE_MSK, data); | ||
272 | + } | ||
273 | + | ||
274 | + return IIO_VAL_INT; | ||
275 | + default: | ||
276 | + return -EINVAL; | ||
277 | + } | ||
278 | +} | ||
279 | + | ||
280 | +static int admv1013_write_raw(struct iio_dev *indio_dev, | ||
281 | + struct iio_chan_spec const *chan, | ||
282 | + int val, int val2, long info) | ||
283 | +{ | ||
284 | + struct admv1013_state *st = iio_priv(indio_dev); | ||
285 | + int ret; | ||
286 | + | ||
287 | + switch (info) { | ||
288 | + case IIO_CHAN_INFO_OFFSET: | ||
289 | + val2 /= 100000; | ||
290 | + | ||
291 | + if (chan->channel2 == IIO_MOD_I) | ||
292 | + ret = admv1013_spi_update_bits(st, ADMV1013_REG_OFFSET_ADJUST_I, | ||
293 | + ADMV1013_MIXER_OFF_ADJ_I_P_MSK | | ||
294 | + ADMV1013_MIXER_OFF_ADJ_I_N_MSK, | ||
295 | + FIELD_PREP(ADMV1013_MIXER_OFF_ADJ_I_P_MSK, val) | | ||
296 | + FIELD_PREP(ADMV1013_MIXER_OFF_ADJ_I_N_MSK, val2)); | ||
297 | + else | ||
298 | + ret = admv1013_spi_update_bits(st, ADMV1013_REG_OFFSET_ADJUST_Q, | ||
299 | + ADMV1013_MIXER_OFF_ADJ_Q_P_MSK | | ||
300 | + ADMV1013_MIXER_OFF_ADJ_Q_N_MSK, | ||
301 | + FIELD_PREP(ADMV1013_MIXER_OFF_ADJ_Q_P_MSK, val) | | ||
302 | + FIELD_PREP(ADMV1013_MIXER_OFF_ADJ_Q_N_MSK, val2)); | ||
303 | + | ||
304 | + return ret; | ||
305 | + case IIO_CHAN_INFO_PHASE: | ||
306 | + if (chan->channel2 == IIO_MOD_I) | ||
307 | + return admv1013_spi_update_bits(st, ADMV1013_REG_LO_AMP_I, | ||
308 | + ADMV1013_LOAMP_PH_ADJ_I_FINE_MSK, | ||
309 | + FIELD_PREP(ADMV1013_LOAMP_PH_ADJ_I_FINE_MSK, val)); | ||
310 | + else | ||
311 | + return admv1013_spi_update_bits(st, ADMV1013_REG_LO_AMP_Q, | ||
312 | + ADMV1013_LOAMP_PH_ADJ_Q_FINE_MSK, | ||
313 | + FIELD_PREP(ADMV1013_LOAMP_PH_ADJ_Q_FINE_MSK, val)); | ||
314 | + default: | ||
315 | + return -EINVAL; | ||
316 | + } | ||
317 | +} | ||
318 | + | ||
319 | +static int admv1013_update_quad_filters(struct admv1013_state *st) | ||
320 | +{ | ||
321 | + unsigned int filt_raw; | ||
322 | + u64 rate = clk_get_rate(st->clkin); | ||
323 | + | ||
324 | + if (rate >= 5400000000 && rate <= 7000000000) | ||
325 | + filt_raw = 15; | ||
326 | + else if (rate >= 5400000000 && rate <= 8000000000) | ||
327 | + filt_raw = 10; | ||
328 | + else if (rate >= 6600000000 && rate <= 9200000000) | ||
329 | + filt_raw = 5; | ||
330 | + else | ||
331 | + filt_raw = 0; | ||
332 | + | ||
333 | + return __admv1013_spi_update_bits(st, ADMV1013_REG_QUAD, | ||
334 | + ADMV1013_QUAD_FILTERS_MSK, | ||
335 | + FIELD_PREP(ADMV1013_QUAD_FILTERS_MSK, filt_raw)); | ||
336 | +} | ||
337 | + | ||
338 | +static int admv1013_update_mixer_vgate(struct admv1013_state *st) | ||
339 | +{ | ||
340 | + unsigned int vcm, mixer_vgate; | ||
341 | + | ||
342 | + vcm = regulator_get_voltage(st->reg); | ||
343 | + | ||
344 | + if (vcm >= 0 && vcm < 1800000) | ||
345 | + mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100; | ||
346 | + else if (vcm > 1800000 && vcm < 2600000) | ||
347 | + mixer_vgate = (2375 * vcm / 1000000 + 125) / 100; | ||
348 | + else | ||
349 | + return -EINVAL; | ||
350 | + | ||
351 | + return __admv1013_spi_update_bits(st, ADMV1013_REG_LO_AMP_I, | ||
352 | + ADMV1013_MIXER_VGATE_MSK, | ||
353 | + FIELD_PREP(ADMV1013_MIXER_VGATE_MSK, mixer_vgate)); | ||
354 | +} | ||
355 | + | ||
356 | +static int admv1013_reg_access(struct iio_dev *indio_dev, | ||
357 | + unsigned int reg, | ||
358 | + unsigned int write_val, | ||
359 | + unsigned int *read_val) | ||
360 | +{ | ||
361 | + struct admv1013_state *st = iio_priv(indio_dev); | ||
362 | + int ret; | ||
363 | + | ||
364 | + if (read_val) | ||
365 | + ret = admv1013_spi_read(st, reg, read_val); | ||
366 | + else | ||
367 | + ret = admv1013_spi_write(st, reg, write_val); | ||
368 | + | ||
369 | + return ret; | ||
370 | +} | ||
371 | + | ||
372 | +static const struct iio_info admv1013_info = { | ||
373 | + .read_raw = admv1013_read_raw, | ||
374 | + .write_raw = admv1013_write_raw, | ||
375 | + .debugfs_reg_access = &admv1013_reg_access, | ||
376 | +}; | ||
377 | + | ||
378 | +static int admv1013_freq_change(struct notifier_block *nb, unsigned long action, void *data) | ||
379 | +{ | ||
380 | + struct admv1013_state *st = container_of(nb, struct admv1013_state, nb); | ||
381 | + int ret; | ||
382 | + | ||
383 | + if (action == POST_RATE_CHANGE) { | ||
384 | + mutex_lock(&st->lock); | ||
385 | + ret = notifier_from_errno(admv1013_update_quad_filters(st)); | ||
386 | + mutex_unlock(&st->lock); | ||
387 | + return ret; | ||
388 | + } | ||
389 | + | ||
390 | + return NOTIFY_OK; | ||
391 | +} | ||
392 | + | ||
393 | +static void admv1013_clk_notifier_unreg(void *data) | ||
394 | +{ | ||
395 | + struct admv1013_state *st = data; | ||
396 | + | ||
397 | + clk_notifier_unregister(st->clkin, &st->nb); | ||
398 | +} | ||
399 | + | ||
400 | +#define ADMV1013_CHAN(_channel, rf_comp) { \ | ||
401 | + .type = IIO_ALTVOLTAGE, \ | ||
402 | + .modified = 1, \ | ||
403 | + .output = 1, \ | ||
404 | + .indexed = 1, \ | ||
405 | + .channel2 = IIO_MOD_##rf_comp, \ | ||
406 | + .channel = _channel, \ | ||
407 | + .info_mask_separate = BIT(IIO_CHAN_INFO_PHASE) | \ | ||
408 | + BIT(IIO_CHAN_INFO_OFFSET) \ | ||
409 | + } | ||
410 | + | ||
411 | +static const struct iio_chan_spec admv1013_channels[] = { | ||
412 | + ADMV1013_CHAN(0, I), | ||
413 | + ADMV1013_CHAN(0, Q), | ||
414 | +}; | ||
415 | + | ||
416 | +static int admv1013_init(struct admv1013_state *st) | ||
417 | +{ | ||
418 | + int ret; | ||
419 | + unsigned int chip_id, enable_reg, enable_reg_msk; | ||
420 | + struct spi_device *spi = st->spi; | ||
421 | + | ||
422 | + /* Perform a software reset */ | ||
423 | + ret = __admv1013_spi_update_bits(st, ADMV1013_REG_SPI_CONTROL, | ||
424 | + ADMV1013_SPI_SOFT_RESET_MSK, | ||
425 | + FIELD_PREP(ADMV1013_SPI_SOFT_RESET_MSK, 1)); | ||
426 | + if (ret) | ||
427 | + return ret; | ||
428 | + | ||
429 | + ret = __admv1013_spi_update_bits(st, ADMV1013_REG_SPI_CONTROL, | ||
430 | + ADMV1013_SPI_SOFT_RESET_MSK, | ||
431 | + FIELD_PREP(ADMV1013_SPI_SOFT_RESET_MSK, 0)); | ||
432 | + if (ret) | ||
433 | + return ret; | ||
434 | + | ||
435 | + ret = __admv1013_spi_read(st, ADMV1013_REG_SPI_CONTROL, &chip_id); | ||
436 | + if (ret) | ||
437 | + return ret; | ||
438 | + | ||
439 | + chip_id = FIELD_GET(ADMV1013_CHIP_ID_MSK, chip_id); | ||
440 | + if (chip_id != ADMV1013_CHIP_ID) { | ||
441 | + dev_err(&spi->dev, "Invalid Chip ID.\n"); | ||
442 | + return -EINVAL; | ||
443 | + } | ||
444 | + | ||
445 | + ret = __admv1013_spi_write(st, ADMV1013_REG_VVA_TEMP_COMP, 0xE700); | ||
446 | + if (ret) | ||
447 | + return ret; | ||
448 | + | ||
449 | + ret = __admv1013_spi_update_bits(st, ADMV1013_REG_QUAD, | ||
450 | + ADMV1013_QUAD_SE_MODE_MSK, | ||
451 | + FIELD_PREP(ADMV1013_QUAD_SE_MODE_MSK, st->quad_se_mode)); | ||
452 | + if (ret) | ||
453 | + return ret; | ||
454 | + | ||
455 | + ret = admv1013_update_mixer_vgate(st); | ||
456 | + if (ret) | ||
457 | + return ret; | ||
458 | + | ||
459 | + ret = admv1013_update_quad_filters(st); | ||
460 | + if (ret) | ||
461 | + return ret; | ||
462 | + | ||
463 | + enable_reg_msk = ADMV1013_VGA_PD_MSK | | ||
464 | + ADMV1013_MIXER_PD_MSK | | ||
465 | + ADMV1013_QUAD_PD_MSK | | ||
466 | + ADMV1013_BG_PD_MSK | | ||
467 | + ADMV1013_MIXER_IF_EN_MSK | | ||
468 | + ADMV1013_DET_EN_MSK; | ||
469 | + | ||
470 | + enable_reg = FIELD_PREP(ADMV1013_VGA_PD_MSK, st->vga_pd) | | ||
471 | + FIELD_PREP(ADMV1013_MIXER_PD_MSK, st->mixer_pd) | | ||
472 | + FIELD_PREP(ADMV1013_QUAD_PD_MSK, st->quad_pd ? 7 : 0) | | ||
473 | + FIELD_PREP(ADMV1013_BG_PD_MSK, st->bg_pd) | | ||
474 | + FIELD_PREP(ADMV1013_MIXER_IF_EN_MSK, st->mixer_if_en) | | ||
475 | + FIELD_PREP(ADMV1013_DET_EN_MSK, st->det_en); | ||
476 | + | ||
477 | + return __admv1013_spi_update_bits(st, ADMV1013_REG_ENABLE, enable_reg_msk, enable_reg); | ||
478 | +} | ||
479 | + | ||
480 | +static void admv1013_clk_disable(void *data) | ||
481 | +{ | ||
482 | + clk_disable_unprepare(data); | ||
483 | +} | ||
484 | + | ||
485 | +static void admv1013_reg_disable(void *data) | ||
486 | +{ | ||
487 | + regulator_disable(data); | ||
488 | +} | ||
489 | + | ||
490 | +static void admv1013_powerdown(void *data) | ||
491 | +{ | ||
492 | + unsigned int enable_reg, enable_reg_msk; | ||
493 | + | ||
494 | + /* Disable all components in the Enable Register */ | ||
495 | + enable_reg_msk = ADMV1013_VGA_PD_MSK | | ||
496 | + ADMV1013_MIXER_PD_MSK | | ||
497 | + ADMV1013_QUAD_PD_MSK | | ||
498 | + ADMV1013_BG_PD_MSK | | ||
499 | + ADMV1013_MIXER_IF_EN_MSK | | ||
500 | + ADMV1013_DET_EN_MSK; | ||
501 | + | ||
502 | + enable_reg = FIELD_PREP(ADMV1013_VGA_PD_MSK, 1) | | ||
503 | + FIELD_PREP(ADMV1013_MIXER_PD_MSK, 1) | | ||
504 | + FIELD_PREP(ADMV1013_QUAD_PD_MSK, 7) | | ||
505 | + FIELD_PREP(ADMV1013_BG_PD_MSK, 1) | | ||
506 | + FIELD_PREP(ADMV1013_MIXER_IF_EN_MSK, 0) | | ||
507 | + FIELD_PREP(ADMV1013_DET_EN_MSK, 0); | ||
508 | + | ||
509 | + admv1013_spi_update_bits(data, ADMV1013_REG_ENABLE, enable_reg_msk, enable_reg); | ||
510 | +} | ||
511 | + | ||
512 | +static int admv1013_properties_parse(struct admv1013_state *st) | ||
513 | +{ | ||
514 | + int ret; | ||
515 | + struct spi_device *spi = st->spi; | ||
516 | + | ||
517 | + st->vga_pd = device_property_read_bool(&spi->dev, "adi,vga-pd"); | ||
518 | + st->mixer_pd = device_property_read_bool(&spi->dev, "adi,mixer-pd"); | ||
519 | + st->quad_pd = device_property_read_bool(&spi->dev, "adi,quad-pd"); | ||
520 | + st->bg_pd = device_property_read_bool(&spi->dev, "adi,bg-pd"); | ||
521 | + st->mixer_if_en = device_property_read_bool(&spi->dev, "adi,mixer-if-en"); | ||
522 | + st->det_en = device_property_read_bool(&spi->dev, "adi,det-en"); | ||
523 | + | ||
524 | + ret = device_property_read_u32(&spi->dev, "adi,quad-se-mode", &st->quad_se_mode); | ||
525 | + if (ret) | ||
526 | + st->quad_se_mode = 12; | ||
527 | + | ||
528 | + st->reg = devm_regulator_get(&spi->dev, "vcm"); | ||
529 | + if (IS_ERR(st->reg)) | ||
530 | + return dev_err_probe(&spi->dev, PTR_ERR(st->reg), | ||
531 | + "failed to get the common-mode voltage\n"); | ||
532 | + | ||
533 | + st->clkin = devm_clk_get(&spi->dev, "lo_in"); | ||
534 | + if (IS_ERR(st->clkin)) | ||
535 | + return dev_err_probe(&spi->dev, PTR_ERR(st->clkin), | ||
536 | + "failed to get the LO input clock\n"); | ||
537 | + | ||
538 | + return 0; | ||
539 | +} | ||
540 | + | ||
541 | +static int admv1013_probe(struct spi_device *spi) | ||
542 | +{ | ||
543 | + struct iio_dev *indio_dev; | ||
544 | + struct admv1013_state *st; | ||
545 | + int ret; | ||
546 | + | ||
547 | + indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st)); | ||
548 | + if (!indio_dev) | ||
549 | + return -ENOMEM; | ||
550 | + | ||
551 | + st = iio_priv(indio_dev); | ||
552 | + | ||
553 | + indio_dev->dev.parent = &spi->dev; | ||
554 | + indio_dev->info = &admv1013_info; | ||
555 | + indio_dev->name = "admv1013"; | ||
556 | + indio_dev->channels = admv1013_channels; | ||
557 | + indio_dev->num_channels = ARRAY_SIZE(admv1013_channels); | ||
558 | + | ||
559 | + st->spi = spi; | ||
560 | + | ||
561 | + ret = admv1013_properties_parse(st); | ||
562 | + if (ret) | ||
563 | + return ret; | ||
564 | + | ||
565 | + ret = regulator_enable(st->reg); | ||
566 | + if (ret) { | ||
567 | + dev_err(&spi->dev, "Failed to enable specified Common-Mode Voltage!\n"); | ||
568 | + return ret; | ||
569 | + } | ||
570 | + | ||
571 | + ret = devm_add_action_or_reset(&spi->dev, admv1013_reg_disable, | ||
572 | + st->reg); | ||
573 | + if (ret) | ||
574 | + return ret; | ||
575 | + | ||
576 | + ret = clk_prepare_enable(st->clkin); | ||
577 | + if (ret) | ||
578 | + return ret; | ||
579 | + | ||
580 | + ret = devm_add_action_or_reset(&spi->dev, admv1013_clk_disable, st->clkin); | ||
581 | + if (ret) | ||
582 | + return ret; | ||
583 | + | ||
584 | + st->nb.notifier_call = admv1013_freq_change; | ||
585 | + ret = clk_notifier_register(st->clkin, &st->nb); | ||
586 | + if (ret) | ||
587 | + return ret; | ||
588 | + | ||
589 | + ret = devm_add_action_or_reset(&spi->dev, admv1013_clk_notifier_unreg, st); | ||
590 | + if (ret) | ||
591 | + return ret; | ||
592 | + | ||
593 | + mutex_init(&st->lock); | ||
594 | + | ||
595 | + ret = admv1013_init(st); | ||
596 | + if (ret) { | ||
597 | + dev_err(&spi->dev, "admv1013 init failed\n"); | ||
598 | + return ret; | ||
599 | + } | ||
600 | + | ||
601 | + ret = devm_add_action_or_reset(&spi->dev, admv1013_powerdown, st); | ||
602 | + if (ret) | ||
603 | + return ret; | ||
604 | + | ||
605 | + return devm_iio_device_register(&spi->dev, indio_dev); | ||
606 | +} | ||
607 | + | ||
608 | +static const struct spi_device_id admv1013_id[] = { | ||
609 | + { "admv1013", 0}, | ||
610 | + {} | ||
611 | +}; | ||
612 | +MODULE_DEVICE_TABLE(spi, admv1013_id); | ||
613 | + | ||
614 | +static const struct of_device_id admv1013_of_match[] = { | ||
615 | + { .compatible = "adi,admv1013" }, | ||
616 | + {}, | ||
617 | +}; | ||
618 | +MODULE_DEVICE_TABLE(of, admv1013_of_match); | ||
619 | + | ||
620 | +static struct spi_driver admv1013_driver = { | ||
621 | + .driver = { | ||
622 | + .name = "admv1013", | ||
623 | + .of_match_table = admv1013_of_match, | ||
624 | + }, | ||
625 | + .probe = admv1013_probe, | ||
626 | + .id_table = admv1013_id, | ||
627 | +}; | ||
628 | +module_spi_driver(admv1013_driver); | ||
629 | + | ||
630 | +MODULE_AUTHOR("Antoniu Miclaus <antoniu.miclaus@analog.com"); | ||
631 | +MODULE_DESCRIPTION("Analog Devices ADMV1013"); | ||
632 | +MODULE_LICENSE("GPL v2"); | ||
633 | -- | ||
634 | 2.33.1 | ||
17 | 635 | ||
18 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/kernel/sched/fair.c | ||
21 | +++ b/kernel/sched/fair.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static inline int has_pushable_tasks(struct rq *rq) | ||
23 | return !plist_head_empty(&rq->cfs.pushable_tasks); | ||
24 | } | ||
25 | |||
26 | +static struct task_struct *pick_next_pushable_fair_task(struct rq *rq) | ||
27 | +{ | ||
28 | + struct task_struct *p; | ||
29 | + | ||
30 | + if (!has_pushable_tasks(rq)) | ||
31 | + return NULL; | ||
32 | + | ||
33 | + p = plist_first_entry(&rq->cfs.pushable_tasks, | ||
34 | + struct task_struct, pushable_tasks); | ||
35 | + | ||
36 | + WARN_ON_ONCE(rq->cpu != task_cpu(p)); | ||
37 | + WARN_ON_ONCE(task_current(rq, p)); | ||
38 | + WARN_ON_ONCE(p->nr_cpus_allowed <= 1); | ||
39 | + WARN_ON_ONCE(!task_on_rq_queued(p)); | ||
40 | + | ||
41 | + /* | ||
42 | + * Remove task from the pushable list as we try only once after that | ||
43 | + * the task has been put back in enqueued list. | ||
44 | + */ | ||
45 | + plist_del(&p->pushable_tasks, &rq->cfs.pushable_tasks); | ||
46 | + | ||
47 | + return p; | ||
48 | +} | ||
49 | + | ||
50 | +static void fair_add_pushable_task(struct rq *rq, struct task_struct *p); | ||
51 | +static void attach_one_task(struct rq *rq, struct task_struct *p); | ||
52 | + | ||
53 | /* | ||
54 | * See if the non running fair tasks on this rq can be sent on other CPUs | ||
55 | * that fits better with their profile. | ||
56 | */ | ||
57 | static bool push_fair_task(struct rq *rq) | ||
58 | { | ||
59 | + struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); | ||
60 | + struct task_struct *p = pick_next_pushable_fair_task(rq); | ||
61 | + int cpu, this_cpu = cpu_of(rq); | ||
62 | + | ||
63 | + if (!p) | ||
64 | + return false; | ||
65 | + | ||
66 | + if (!cpumask_and(cpus, nohz.idle_cpus_mask, housekeeping_cpumask(HK_TYPE_KERNEL_NOISE))) | ||
67 | + goto requeue; | ||
68 | + | ||
69 | + if (!cpumask_and(cpus, cpus, p->cpus_ptr)) | ||
70 | + goto requeue; | ||
71 | + | ||
72 | + for_each_cpu_wrap(cpu, cpus, this_cpu + 1) { | ||
73 | + struct rq *target_rq; | ||
74 | + | ||
75 | + if (!idle_cpu(cpu)) | ||
76 | + continue; | ||
77 | + | ||
78 | + target_rq = cpu_rq(cpu); | ||
79 | + deactivate_task(rq, p, 0); | ||
80 | + set_task_cpu(p, cpu); | ||
81 | + raw_spin_rq_unlock(rq); | ||
82 | + | ||
83 | + attach_one_task(target_rq, p); | ||
84 | + raw_spin_rq_lock(rq); | ||
85 | + | ||
86 | + return true; | ||
87 | + } | ||
88 | + | ||
89 | +requeue: | ||
90 | + fair_add_pushable_task(rq, p); | ||
91 | return false; | ||
92 | } | ||
93 | |||
94 | -- | ||
95 | 2.34.1 | diff view generated by jsdifflib |
1 | With the introduction of "overloaded_mask" in sched_domain_shared | 1 | Add device tree bindings for the ADMV1013 Upconverter. |
---|---|---|---|
2 | struct, it is now possible to scan through the CPUs that contain | ||
3 | pushable tasks that could be run on the CPU going newly idle. | ||
4 | 2 | ||
5 | Redesign the inter-NUMA newidle balancing to opportunistically pull a | 3 | Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com> |
6 | task to the CPU going idle from the overloaded CPUs only. | 4 | --- |
5 | .../bindings/iio/frequency/adi,admv1013.yaml | 110 ++++++++++++++++++ | ||
6 | 1 file changed, 110 insertions(+) | ||
7 | create mode 100644 Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml | ||
7 | 8 | ||
8 | The search starts from sd_llc and moves up until sd_numa. Since | 9 | diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml |
9 | "overloaded_mask" is per-LLC, each LLC domain is visited individually | 10 | new file mode 100644 |
10 | using per-CPU sd_llc struct shared by all CPUs in an LLC. | 11 | index XXXXXXX..XXXXXXX |
12 | --- /dev/null | ||
13 | +++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml | ||
14 | @@ -XXX,XX +XXX,XX @@ | ||
15 | +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) | ||
16 | +%YAML 1.2 | ||
17 | +--- | ||
18 | +$id: http://devicetree.org/schemas/iio/frequency/adi,admv1013.yaml# | ||
19 | +$schema: http://devicetree.org/meta-schemas/core.yaml# | ||
20 | + | ||
21 | +title: ADMV1013 Microwave Upconverter | ||
22 | + | ||
23 | +maintainers: | ||
24 | + - Antoniu Miclaus <antoniu.miclaus@analog.com> | ||
25 | + | ||
26 | +description: | | ||
27 | + Wideband, microwave upconverter optimized for point to point microwave | ||
28 | + radio designs operating in the 24 GHz to 44 GHz frequency range. | ||
29 | + | ||
30 | + https://www.analog.com/en/products/admv1013.html | ||
31 | + | ||
32 | +properties: | ||
33 | + compatible: | ||
34 | + enum: | ||
35 | + - adi,admv1013 | ||
36 | + | ||
37 | + reg: | ||
38 | + maxItems: 1 | ||
39 | + | ||
40 | + spi-max-frequency: | ||
41 | + maximum: 1000000 | ||
42 | + | ||
43 | + clocks: | ||
44 | + description: | ||
45 | + Definition of the external clock. | ||
46 | + minItems: 1 | ||
47 | + | ||
48 | + clock-names: | ||
49 | + items: | ||
50 | + - const: lo_in | ||
51 | + | ||
52 | + clock-output-names: | ||
53 | + maxItems: 1 | ||
54 | + | ||
55 | + vcm-supply: | ||
56 | + description: | ||
57 | + Analog voltage regulator. | ||
58 | + | ||
59 | + adi,vga-pd: | ||
60 | + description: | ||
61 | + Power Down the Voltage Gain Amplifier Circuit. | ||
62 | + type: boolean | ||
63 | + | ||
64 | + adi,mixer-pd: | ||
65 | + description: | ||
66 | + Power Down the Mixer Circuit. | ||
67 | + type: boolean | ||
68 | + | ||
69 | + adi,quad-pd: | ||
70 | + description: | ||
71 | + Power Down the Quadrupler. | ||
72 | + type: boolean | ||
73 | + | ||
74 | + adi,bg-pd: | ||
75 | + description: | ||
76 | + Power Down the Transmitter Band Gap. | ||
77 | + type: boolean | ||
78 | + | ||
79 | + adi,mixer-if-en: | ||
80 | + description: | ||
81 | + Enable the Intermediate Frequency Mode. | ||
82 | + type: boolean | ||
83 | + | ||
84 | + adi,det-en: | ||
85 | + description: | ||
86 | + Enable the Envelope Detector. | ||
87 | + type: boolean | ||
88 | + | ||
89 | + adi,quad-se-mode: | ||
90 | + description: | ||
91 | + Switch the LO path from differential to single-ended operation. | ||
92 | + $ref: /schemas/types.yaml#/definitions/uint32 | ||
93 | + enum: [6, 9, 12] | ||
94 | + | ||
95 | + '#clock-cells': | ||
96 | + const: 0 | ||
97 | + | ||
98 | +required: | ||
99 | + - compatible | ||
100 | + - reg | ||
101 | + - clocks | ||
102 | + - clock-names | ||
103 | + - vcm-supply | ||
104 | + | ||
105 | +additionalProperties: false | ||
106 | + | ||
107 | +examples: | ||
108 | + - | | ||
109 | + spi { | ||
110 | + #address-cells = <1>; | ||
111 | + #size-cells = <0>; | ||
112 | + admv1013@0{ | ||
113 | + compatible = "adi,admv1013"; | ||
114 | + reg = <0>; | ||
115 | + spi-max-frequency = <1000000>; | ||
116 | + clocks = <&admv1013_lo>; | ||
117 | + clock-names = "lo_in"; | ||
118 | + vcm-supply = <&vcm>; | ||
119 | + adi,quad-se-mode = <12>; | ||
120 | + adi,mixer-if-en; | ||
121 | + adi,det-en; | ||
122 | + }; | ||
123 | + }; | ||
124 | +... | ||
125 | -- | ||
126 | 2.33.1 | ||
11 | 127 | ||
12 | Once visited for one, all CPUs in the LLC are marked visited and the | ||
13 | search resumes for the LLCs of CPUs that remain to be visited. | ||
14 | |||
15 | detach_one_task() was used in instead of pick_next_pushable_fair_task() | ||
16 | since detach_one_task() also considers the CPU affinity of the task | ||
17 | being pulled as opposed to pick_next_pushable_fair_task() which returns | ||
18 | the first pushable task. | ||
19 | |||
20 | Since each iteration of overloaded_mask rechecks the idle state of the | ||
21 | CPU doing newidle balance, the initial gating factor based on | ||
22 | "rq->avg_idle" has been removed. | ||
23 | |||
24 | Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> | ||
25 | --- | ||
26 | kernel/sched/fair.c | 129 +++++++++++++++++++++++++++++++++++++++----- | ||
27 | 1 file changed, 117 insertions(+), 12 deletions(-) | ||
28 | |||
29 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/kernel/sched/fair.c | ||
32 | +++ b/kernel/sched/fair.c | ||
33 | @@ -XXX,XX +XXX,XX @@ static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle | ||
34 | static inline void nohz_newidle_balance(struct rq *this_rq) { } | ||
35 | #endif /* CONFIG_NO_HZ_COMMON */ | ||
36 | |||
37 | +static inline bool sched_newidle_continue_balance(struct rq *rq) | ||
38 | +{ | ||
39 | + return !rq->nr_running && !rq->ttwu_pending; | ||
40 | +} | ||
41 | + | ||
42 | +static inline int sched_newidle_pull_overloaded(struct sched_domain *sd, | ||
43 | + struct rq *this_rq, | ||
44 | + int *continue_balancing) | ||
45 | +{ | ||
46 | + struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); | ||
47 | + int cpu, this_cpu = cpu_of(this_rq); | ||
48 | + struct sched_domain *sd_parent; | ||
49 | + struct lb_env env = { | ||
50 | + .dst_cpu = this_cpu, | ||
51 | + .dst_rq = this_rq, | ||
52 | + .idle = CPU_NEWLY_IDLE, | ||
53 | + }; | ||
54 | + | ||
55 | + | ||
56 | + cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask); | ||
57 | + | ||
58 | +next_domain: | ||
59 | + env.sd = sd; | ||
60 | + /* Allow migrating cache_hot tasks too. */ | ||
61 | + sd->nr_balance_failed = sd->cache_nice_tries + 1; | ||
62 | + | ||
63 | + for_each_cpu_wrap(cpu, cpus, this_cpu) { | ||
64 | + struct sched_domain_shared *sd_share; | ||
65 | + struct cpumask *overloaded_mask; | ||
66 | + struct sched_domain *cpu_llc; | ||
67 | + int overloaded_cpu; | ||
68 | + | ||
69 | + cpu_llc = rcu_dereference(per_cpu(sd_llc, cpu)); | ||
70 | + if (!cpu_llc) | ||
71 | + break; | ||
72 | + | ||
73 | + sd_share = cpu_llc->shared; | ||
74 | + if (!sd_share) | ||
75 | + break; | ||
76 | + | ||
77 | + overloaded_mask = sd_share->overloaded_mask; | ||
78 | + if (!overloaded_mask) | ||
79 | + break; | ||
80 | + | ||
81 | + for_each_cpu_wrap(overloaded_cpu, overloaded_mask, this_cpu + 1) { | ||
82 | + struct rq *overloaded_rq = cpu_rq(overloaded_cpu); | ||
83 | + struct task_struct *p = NULL; | ||
84 | + | ||
85 | + if (sched_newidle_continue_balance(this_rq)) { | ||
86 | + *continue_balancing = 0; | ||
87 | + return 0; | ||
88 | + } | ||
89 | + | ||
90 | + /* Quick peek to find if pushable tasks exist. */ | ||
91 | + if (!has_pushable_tasks(overloaded_rq)) | ||
92 | + continue; | ||
93 | + | ||
94 | + scoped_guard (rq_lock, overloaded_rq) { | ||
95 | + update_rq_clock(overloaded_rq); | ||
96 | + | ||
97 | + if (!has_pushable_tasks(overloaded_rq)) | ||
98 | + break; | ||
99 | + | ||
100 | + env.src_cpu = overloaded_cpu; | ||
101 | + env.src_rq = overloaded_rq; | ||
102 | + | ||
103 | + p = detach_one_task(&env); | ||
104 | + } | ||
105 | + | ||
106 | + if (!p) | ||
107 | + continue; | ||
108 | + | ||
109 | + attach_one_task(this_rq, p); | ||
110 | + return 1; | ||
111 | + } | ||
112 | + | ||
113 | + cpumask_andnot(cpus, cpus, sched_domain_span(cpu_llc)); | ||
114 | + } | ||
115 | + | ||
116 | + if (sched_newidle_continue_balance(this_rq)) { | ||
117 | + *continue_balancing = 0; | ||
118 | + return 0; | ||
119 | + } | ||
120 | + | ||
121 | + sd_parent = sd->parent; | ||
122 | + if (sd_parent && !(sd_parent->flags & SD_NUMA)) { | ||
123 | + cpumask_andnot(cpus, sched_domain_span(sd_parent), sched_domain_span(sd)); | ||
124 | + sd = sd_parent; | ||
125 | + goto next_domain; | ||
126 | + } | ||
127 | + | ||
128 | + return 0; | ||
129 | +} | ||
130 | + | ||
131 | /* | ||
132 | * sched_balance_newidle is called by schedule() if this_cpu is about to become | ||
133 | * idle. Attempts to pull tasks from other CPUs. | ||
134 | @@ -XXX,XX +XXX,XX @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) | ||
135 | u64 t0, t1, curr_cost = 0; | ||
136 | struct sched_domain *sd; | ||
137 | int pulled_task = 0; | ||
138 | + u64 domain_cost; | ||
139 | |||
140 | update_misfit_status(NULL, this_rq); | ||
141 | |||
142 | @@ -XXX,XX +XXX,XX @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) | ||
143 | rq_unpin_lock(this_rq, rf); | ||
144 | |||
145 | rcu_read_lock(); | ||
146 | - sd = rcu_dereference_check_sched_domain(this_rq->sd); | ||
147 | - | ||
148 | - if (!get_rd_overloaded(this_rq->rd) || | ||
149 | - (sd && this_rq->avg_idle < sd->max_newidle_lb_cost)) { | ||
150 | - | ||
151 | - if (sd) | ||
152 | - update_next_balance(sd, &next_balance); | ||
153 | + if (!get_rd_overloaded(this_rq->rd)) { | ||
154 | rcu_read_unlock(); | ||
155 | - | ||
156 | goto out; | ||
157 | } | ||
158 | rcu_read_unlock(); | ||
159 | |||
160 | raw_spin_rq_unlock(this_rq); | ||
161 | |||
162 | + rcu_read_lock(); | ||
163 | t0 = sched_clock_cpu(this_cpu); | ||
164 | - sched_balance_update_blocked_averages(this_cpu); | ||
165 | |||
166 | - rcu_read_lock(); | ||
167 | - for_each_domain(this_cpu, sd) { | ||
168 | - u64 domain_cost; | ||
169 | + sd = rcu_dereference(per_cpu(sd_llc, this_cpu)); | ||
170 | + if (sd) { | ||
171 | + pulled_task = sched_newidle_pull_overloaded(sd, this_rq, &continue_balancing); | ||
172 | + | ||
173 | + t1 = sched_clock_cpu(this_cpu); | ||
174 | + domain_cost = t1 - t0; | ||
175 | + curr_cost += domain_cost; | ||
176 | + t0 = t1; | ||
177 | |||
178 | + if (pulled_task || !continue_balancing) | ||
179 | + goto skip_numa; | ||
180 | + } | ||
181 | + | ||
182 | + sched_balance_update_blocked_averages(this_cpu); | ||
183 | + | ||
184 | + sd = rcu_dereference(per_cpu(sd_numa, this_cpu)); | ||
185 | + while (sd) { | ||
186 | update_next_balance(sd, &next_balance); | ||
187 | |||
188 | if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) | ||
189 | @@ -XXX,XX +XXX,XX @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) | ||
190 | */ | ||
191 | if (pulled_task || !continue_balancing) | ||
192 | break; | ||
193 | + | ||
194 | + sd = sd->parent; | ||
195 | } | ||
196 | + | ||
197 | +skip_numa: | ||
198 | rcu_read_unlock(); | ||
199 | |||
200 | raw_spin_rq_lock(this_rq); | ||
201 | -- | ||
202 | 2.34.1 | diff view generated by jsdifflib |