[RFC PATCH 24/24] sched/time: Introduce CLOCK_THREAD_DVFS_ID

Peter Zijlstra posted 24 patches 1 year, 4 months ago
[RFC PATCH 24/24] sched/time: Introduce CLOCK_THREAD_DVFS_ID
Posted by Peter Zijlstra 1 year, 4 months ago
In order to measure thread time in a DVFS world, introduce
CLOCK_THREAD_DVFS_ID -- a copy of CLOCK_THREAD_CPUTIME_ID that slows
down with both DVFS scaling and CPU capacity.

The clock does *NOT* support setting timers.

Useful for both SCHED_DEADLINE and the newly introduced
sched_attr::sched_runtime usage for SCHED_NORMAL.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/posix-timers_types.h |    5 ++--
 include/linux/sched.h              |    1 
 include/linux/sched/cputime.h      |    3 ++
 include/uapi/linux/time.h          |    1 
 kernel/sched/core.c                |   40 +++++++++++++++++++++++++++++++++++++
 kernel/sched/fair.c                |    8 +++++--
 kernel/time/posix-cpu-timers.c     |   16 +++++++++++++-
 kernel/time/posix-timers.c         |    1 
 kernel/time/posix-timers.h         |    1 
 9 files changed, 71 insertions(+), 5 deletions(-)

--- a/include/linux/posix-timers_types.h
+++ b/include/linux/posix-timers_types.h
@@ -13,9 +13,9 @@
  *
  * Bit 2 indicates whether a cpu clock refers to a thread or a process.
  *
- * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+ * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or DVSF=3
  *
- * A clockid is invalid if bits 2, 1, and 0 are all set.
+ * (DVFS is PERTHREAD only)
  */
 #define CPUCLOCK_PID(clock)		((pid_t) ~((clock) >> 3))
 #define CPUCLOCK_PERTHREAD(clock) \
@@ -27,6 +27,7 @@
 #define CPUCLOCK_PROF		0
 #define CPUCLOCK_VIRT		1
 #define CPUCLOCK_SCHED		2
+#define CPUCLOCK_DVFS		3
 #define CPUCLOCK_MAX		3
 #define CLOCKFD			CPUCLOCK_MAX
 #define CLOCKFD_MASK		(CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -550,6 +550,7 @@ struct sched_entity {
 	u64				exec_start;
 	u64				sum_exec_runtime;
 	u64				prev_sum_exec_runtime;
+	u64				sum_dvfs_runtime;
 	u64				vruntime;
 	s64				vlag;
 	u64				slice;
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -180,4 +180,7 @@ static inline void prev_cputime_init(str
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 
+extern unsigned long long
+task_sched_dvfs_runtime(struct task_struct *task);
+
 #endif /* _LINUX_SCHED_CPUTIME_H */
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -62,6 +62,7 @@ struct timezone {
  */
 #define CLOCK_SGI_CYCLE			10
 #define CLOCK_TAI			11
+#define CLOCK_THREAD_DVFS_ID		12
 
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4551,6 +4551,7 @@ static void __sched_fork(unsigned long c
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
+	p->se.sum_dvfs_runtime		= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
 	p->se.vlag			= 0;
@@ -5632,6 +5633,45 @@ unsigned long long task_sched_runtime(st
 	task_rq_unlock(rq, p, &rf);
 
 	return ns;
+}
+
+unsigned long long task_sched_dvfs_runtime(struct task_struct *p)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+	u64 ns;
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+	/*
+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
+	 * So we have a optimization chance when the task's delta_exec is 0.
+	 * Reading ->on_cpu is racy, but this is ok.
+	 *
+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
+	 * If we race with it entering CPU, unaccounted time is 0. This is
+	 * indistinguishable from the read occurring a few cycles earlier.
+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
+	 * been accounted, so we're correct here as well.
+	 */
+	if (!p->on_cpu || !task_on_rq_queued(p))
+		return p->se.sum_dvfs_runtime;
+#endif
+
+	rq = task_rq_lock(p, &rf);
+	/*
+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
+	 * project cycles that may never be accounted to this
+	 * thread, breaking clock_gettime().
+	 */
+	if (task_current(rq, p) && task_on_rq_queued(p)) {
+		prefetch_curr_exec_start(p);
+		update_rq_clock(rq);
+		p->sched_class->update_curr(rq);
+	}
+	ns = p->se.sum_dvfs_runtime;
+	task_rq_unlock(rq, p, &rf);
+
+	return ns;
 }
 
 #ifdef CONFIG_SCHED_DEBUG
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1118,15 +1118,19 @@ static void update_tg_load_avg(struct cf
 static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
 {
 	u64 now = rq_clock_task(rq);
-	s64 delta_exec;
+	s64 delta_exec, delta_dvfs;
 
-	delta_exec = now - curr->exec_start;
+	delta_dvfs = delta_exec = now - curr->exec_start;
 	if (unlikely(delta_exec <= 0))
 		return delta_exec;
 
 	curr->exec_start = now;
 	curr->sum_exec_runtime += delta_exec;
 
+	delta_dvfs = cap_scale(delta_dvfs, arch_scale_freq_capacity(cpu_of(rq)));
+	delta_dvfs = cap_scale(delta_dvfs, arch_scale_cpu_capacity(cpu_of(rq)));
+	curr->sum_dvfs_runtime += delta_dvfs;
+
 	if (schedstat_enabled()) {
 		struct sched_statistics *stats;
 
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -164,7 +164,7 @@ posix_cpu_clock_getres(const clockid_t w
 	if (!error) {
 		tp->tv_sec = 0;
 		tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
-		if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
+		if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_SCHED) {
 			/*
 			 * If sched_clock is using a cycle counter, we
 			 * don't have any idea of its true resolution
@@ -198,6 +198,9 @@ static u64 cpu_clock_sample(const clocki
 	if (clkid == CPUCLOCK_SCHED)
 		return task_sched_runtime(p);
 
+	if (clkid == CPUCLOCK_DVFS)
+		return task_sched_dvfs_runtime(p);
+
 	task_cputime(p, &utime, &stime);
 
 	switch (clkid) {
@@ -1628,6 +1631,7 @@ static long posix_cpu_nsleep_restart(str
 
 #define PROCESS_CLOCK	make_process_cpuclock(0, CPUCLOCK_SCHED)
 #define THREAD_CLOCK	make_thread_cpuclock(0, CPUCLOCK_SCHED)
+#define THREAD_DVFS_CLOCK make_thread_cpuclock(0, CPUCLOCK_DVFS)
 
 static int process_cpu_clock_getres(const clockid_t which_clock,
 				    struct timespec64 *tp)
@@ -1664,6 +1668,11 @@ static int thread_cpu_timer_create(struc
 	timer->it_clock = THREAD_CLOCK;
 	return posix_cpu_timer_create(timer);
 }
+static int thread_dvfs_cpu_clock_get(const clockid_t which_clock,
+				struct timespec64 *tp)
+{
+	return posix_cpu_clock_get(THREAD_DVFS_CLOCK, tp);
+}
 
 const struct k_clock clock_posix_cpu = {
 	.clock_getres		= posix_cpu_clock_getres,
@@ -1690,3 +1699,8 @@ const struct k_clock clock_thread = {
 	.clock_get_timespec	= thread_cpu_clock_get,
 	.timer_create		= thread_cpu_timer_create,
 };
+
+const struct k_clock clock_thread_dvfs = {
+	.clock_getres		= thread_cpu_clock_getres,
+	.clock_get_timespec	= thread_dvfs_cpu_clock_get,
+};
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1516,6 +1516,7 @@ static const struct k_clock * const posi
 	[CLOCK_MONOTONIC]		= &clock_monotonic,
 	[CLOCK_PROCESS_CPUTIME_ID]	= &clock_process,
 	[CLOCK_THREAD_CPUTIME_ID]	= &clock_thread,
+	[CLOCK_THREAD_DVFS_ID]		= &clock_thread_dvfs,
 	[CLOCK_MONOTONIC_RAW]		= &clock_monotonic_raw,
 	[CLOCK_REALTIME_COARSE]		= &clock_realtime_coarse,
 	[CLOCK_MONOTONIC_COARSE]	= &clock_monotonic_coarse,
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -34,6 +34,7 @@ extern const struct k_clock clock_posix_
 extern const struct k_clock clock_posix_dynamic;
 extern const struct k_clock clock_process;
 extern const struct k_clock clock_thread;
+extern const struct k_clock clock_thread_dvfs;
 extern const struct k_clock alarm_clock;
 
 int posix_timer_event(struct k_itimer *timr, int si_private);
Re: [RFC PATCH 24/24] sched/time: Introduce CLOCK_THREAD_DVFS_ID
Posted by Christian Loehle 1 year, 3 months ago
On 7/27/24 11:27, Peter Zijlstra wrote:
> In order to measure thread time in a DVFS world, introduce
> CLOCK_THREAD_DVFS_ID -- a copy of CLOCK_THREAD_CPUTIME_ID that slows
> down with both DVFS scaling and CPU capacity.
> 
> The clock does *NOT* support setting timers.
> 
> Useful for both SCHED_DEADLINE and the newly introduced
> sched_attr::sched_runtime usage for SCHED_NORMAL.
> 

How will this look like in practice then?
Is it up to userspace to adjust sched_runtime to capacity/dvfs
accordingly every time it changes? I guess not.
Will sched_attr::sched_runtime be for CPUCLOCK_DVFS by default?
I assume that would be a uapi change?
Do we need an additional flag in sched_attr to specify the clock
to be measured against?

> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  include/linux/posix-timers_types.h |    5 ++--
>  include/linux/sched.h              |    1 
>  include/linux/sched/cputime.h      |    3 ++
>  include/uapi/linux/time.h          |    1 
>  kernel/sched/core.c                |   40 +++++++++++++++++++++++++++++++++++++
>  kernel/sched/fair.c                |    8 +++++--
>  kernel/time/posix-cpu-timers.c     |   16 +++++++++++++-
>  kernel/time/posix-timers.c         |    1 
>  kernel/time/posix-timers.h         |    1 
>  9 files changed, 71 insertions(+), 5 deletions(-)
> 
> --- a/include/linux/posix-timers_types.h
> +++ b/include/linux/posix-timers_types.h
> @@ -13,9 +13,9 @@
>   *
>   * Bit 2 indicates whether a cpu clock refers to a thread or a process.
>   *
> - * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
> + * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or DVSF=3

s/DVSF/DVFS

> [snip]
Re: [RFC PATCH 24/24] sched/time: Introduce CLOCK_THREAD_DVFS_ID
Posted by Juri Lelli 1 year, 4 months ago
Hi Peter,

On 27/07/24 12:27, Peter Zijlstra wrote:
> In order to measure thread time in a DVFS world, introduce
> CLOCK_THREAD_DVFS_ID -- a copy of CLOCK_THREAD_CPUTIME_ID that slows
> down with both DVFS scaling and CPU capacity.
> 
> The clock does *NOT* support setting timers.
> 
> Useful for both SCHED_DEADLINE and the newly introduced
> sched_attr::sched_runtime usage for SCHED_NORMAL.

Just so I'm sure I understand, this would be useful for estimating the
runtime needs of a (also DEADLINE) task when DVFS is enabled, right?

Thanks,
Juri
Re: [RFC PATCH 24/24] sched/time: Introduce CLOCK_THREAD_DVFS_ID
Posted by Peter Zijlstra 1 year, 4 months ago
On Mon, Jul 29, 2024 at 09:53:17AM +0200, Juri Lelli wrote:
> Hi Peter,
> 
> On 27/07/24 12:27, Peter Zijlstra wrote:
> > In order to measure thread time in a DVFS world, introduce
> > CLOCK_THREAD_DVFS_ID -- a copy of CLOCK_THREAD_CPUTIME_ID that slows
> > down with both DVFS scaling and CPU capacity.
> > 
> > The clock does *NOT* support setting timers.
> > 
> > Useful for both SCHED_DEADLINE and the newly introduced
> > sched_attr::sched_runtime usage for SCHED_NORMAL.
> 
> Just so I'm sure I understand, this would be useful for estimating the
> runtime needs of a (also DEADLINE) task when DVFS is enabled, right?

Correct, DVFS or biggie-smalls CPUs with mixed capacities.
Re: [RFC PATCH 24/24] sched/time: Introduce CLOCK_THREAD_DVFS_ID
Posted by Thomas Gleixner 1 year, 4 months ago
On Sat, Jul 27 2024 at 12:27, Peter Zijlstra wrote:
> In order to measure thread time in a DVFS world, introduce
> CLOCK_THREAD_DVFS_ID -- a copy of CLOCK_THREAD_CPUTIME_ID that slows
> down with both DVFS scaling and CPU capacity.
>
> The clock does *NOT* support setting timers.

That's not the only limitation. See below.

> Useful for both SCHED_DEADLINE and the newly introduced
> sched_attr::sched_runtime usage for SCHED_NORMAL.

Can this please have an explanation about the usage of the previously
reserved value of 0x7 in the lower 3 bits?

>   *
>   * Bit 2 indicates whether a cpu clock refers to a thread or a process.
>   *
> - * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
> + * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or DVSF=3
>   *
> - * A clockid is invalid if bits 2, 1, and 0 are all set.
> + * (DVFS is PERTHREAD only)

This drops the information about the FD usage. Something like:

/*
 * Bit fields within a clockid:
 *
 * Bit 31:3 hold either a pid or a file descriptor.
 *
 * Bit 2  Bit 1  Bit 0
 *   0      0      0     Per process	CPUCLOCK_PROF
 *   0      0      1     Per process	CPUCLOCK_VIRT
 *   0      1      0     Per process	CPUCLOCK_SCHED
 *   0      1      1     Posixclock FD	CLOCKFD
 *   1      0      0     Per thread	CPUCLOCK_PROF
 *   1      0      1     Per thread	CPUCLOCK_VIRT
 *   1      1      0     Per thread	CPUCLOCK_SCHED
 *   1      1      1     Per thread	CPUCLOCK_DVSF
 *
 * CPUCLOCK_DVSF is per thread only and shares the type code in Bit 1:0
 * with CLOCKFD. CLOCKFD uses a file descriptor to access dynamically
 * registered POSIX clocks (e.g. PTP hardware clocks).
 */

should be clear enough, no?

But, all of this is wishful thinking because the provided implementation
only works for:

      sys_clock_getres(CLOCK_THREAD_DVFS_ID, ...)

which falls back to thread_cpu_clock_getres().

The variant which has the TID encoded in bit 31:3 and the type in bit
2:0 fails the test in pid_for_clock():

        if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
		return NULL;

Worse for sys_clock_gettime(). That fails in both cases for the very
same reason.

See the uncompiled delta patch below for a cure of that and the rest of
my comments.

>   #define CPUCLOCK_PROF		0
>   #define CPUCLOCK_VIRT		1
>   #define CPUCLOCK_SCHED		2
>  +#define CPUCLOCK_DVFS		3
>   #define CPUCLOCK_MAX		3
>   #define CLOCKFD			CPUCLOCK_MAX
>   #define CLOCKFD_MASK		(CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)

With that DVFS addition CPUCLOCK_MAX is misleading at best. See delta
patch.

> +
> +	rq = task_rq_lock(p, &rf);
> +	/*
> +	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
> +	 * project cycles that may never be accounted to this
> +	 * thread, breaking clock_gettime().

Must be? For what? I assume you want to say:

     Update the runtime if the task is the current task and on the
     runqueue. The latter is important because if current is dequeued,
     ....

> +	 */
> +	if (task_current(rq, p) && task_on_rq_queued(p)) {
> +		prefetch_curr_exec_start(p);
> +		update_rq_clock(rq);
> +		p->sched_class->update_curr(rq);
> +	}
> +	ns = p->se.sum_dvfs_runtime;
> +	task_rq_unlock(rq, p, &rf);
> @@ -1664,6 +1668,11 @@ static int thread_cpu_timer_create(struc
>  	timer->it_clock = THREAD_CLOCK;
>  	return posix_cpu_timer_create(timer);
>  }
> +static int thread_dvfs_cpu_clock_get(const clockid_t which_clock,
> +				struct timespec64 *tp)

Please align the second line properly with the argument in the first line.

Thanks,

        tglx
---

--- a/include/linux/posix-timers_types.h
+++ b/include/linux/posix-timers_types.h
@@ -9,27 +9,42 @@
 /*
  * Bit fields within a clockid:
  *
- * The most significant 29 bits hold either a pid or a file descriptor.
+ * Bit 31:3 hold either a PID/TID or a file descriptor.
  *
- * Bit 2 indicates whether a cpu clock refers to a thread or a process.
+ * Bit 2  Bit 1  Bit 0
+ *   0      0      0     Per process	CPUCLOCK_PROF
+ *   0      0      1     Per process	CPUCLOCK_VIRT
+ *   0      1      0     Per process	CPUCLOCK_SCHED
+ *   0      1      1     Posixclock FD	CLOCKFD
+ *   1      0      0     Per thread	CPUCLOCK_PROF
+ *   1      0      1     Per thread	CPUCLOCK_VIRT
+ *   1      1      0     Per thread	CPUCLOCK_SCHED
+ *   1      1      1     Per thread	CPUCLOCK_DVSF
  *
- * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or DVSF=3
- *
- * (DVFS is PERTHREAD only)
+ * CPUCLOCK_DVSF is per thread only and shares the type code in Bit 1:0
+ * with CLOCKFD. CLOCKFD uses a file descriptor to access dynamically
+ * registered POSIX clocks (e.g. PTP hardware clocks).
  */
+
 #define CPUCLOCK_PID(clock)		((pid_t) ~((clock) >> 3))
-#define CPUCLOCK_PERTHREAD(clock) \
-	(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
+#define CPUCLOCK_PERTHREAD(clock)	(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
 
-#define CPUCLOCK_PERTHREAD_MASK	4
-#define CPUCLOCK_WHICH(clock)	((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
-#define CPUCLOCK_CLOCK_MASK	3
 #define CPUCLOCK_PROF		0
 #define CPUCLOCK_VIRT		1
 #define CPUCLOCK_SCHED		2
-#define CPUCLOCK_DVFS		3
-#define CPUCLOCK_MAX		3
-#define CLOCKFD			CPUCLOCK_MAX
+#define CPUCLOCK_SAMPLE_MAX	(CPUCLOCK_SCHED + 1)
+
+#define CPUCLOCK_CLOCK_MASK	3
+#define CPUCLOCK_PERTHREAD_MASK	4
+#define CPUCLOCK_WHICH(clock)	((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
+
+/*
+ * CPUCLOCK_DVFS and CLOCKFD share the type code in bit 1:0. CPUCLOCK_DVFS
+ * does not belong to the sampling clocks and does not allow timers to be
+ * armed on it.
+ */
+#define CPUCLOCK_DVFS		CPUCLOCK_SAMPLE_MAX
+#define CLOCKFD			CPUCLOCK_DVFS
 #define CLOCKFD_MASK		(CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
 
 #ifdef CONFIG_POSIX_TIMERS
@@ -55,7 +70,7 @@ struct posix_cputimer_base {
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
-	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
+	struct posix_cputimer_base	bases[CPUCLOCK_SAMPLE_MAX];
 	unsigned int			timers_active;
 	unsigned int			expiry_active;
 };
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5413,9 +5413,10 @@ unsigned long long task_sched_dvfs_runti
 
 	rq = task_rq_lock(p, &rf);
 	/*
-	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-	 * project cycles that may never be accounted to this
-	 * thread, breaking clock_gettime().
+	 * Update the runtime if the task is the current task and on the
+	 * runqueue. The latter is important because if current is
+	 * dequeued, we would project cycles that may never be accounted to
+	 * this thread, breaking clock_gettime().
 	 */
 	if (task_current(rq, p) && task_on_rq_queued(p)) {
 		prefetch_curr_exec_start(p);
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -54,13 +54,13 @@ int update_rlimit_cpu(struct task_struct
 /*
  * Functions for validating access to tasks.
  */
-static struct pid *pid_for_clock(const clockid_t clock, bool gettime)
+static struct pid *__pid_for_clock(const clockid_t clock, const clockid_t maxclock, bool gettime)
 {
 	const bool thread = !!CPUCLOCK_PERTHREAD(clock);
 	const pid_t upid = CPUCLOCK_PID(clock);
 	struct pid *pid;
 
-	if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
+	if (CPUCLOCK_WHICH(clock) > maxclock)
 		return NULL;
 
 	/*
@@ -94,12 +94,17 @@ static struct pid *pid_for_clock(const c
 	return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;
 }
 
+static inline struct pid *pid_for_clock(const clockid_t clock, bool gettime)
+{
+	return __pid_for_clock(clock, CPUCLOCK_SCHED, gettime);
+}
+
 static inline int validate_clock_permissions(const clockid_t clock)
 {
 	int ret;
 
 	rcu_read_lock();
-	ret = pid_for_clock(clock, false) ? 0 : -EINVAL;
+	ret = __pid_for_clock(clock, CPUCLOCK_DVFS, false) ? 0 : -EINVAL;
 	rcu_read_unlock();
 
 	return ret;
@@ -344,7 +349,7 @@ static u64 cpu_clock_sample_group(const
 {
 	struct thread_group_cputimer *cputimer = &p->signal->cputimer;
 	struct posix_cputimers *pct = &p->signal->posix_cputimers;
-	u64 samples[CPUCLOCK_MAX];
+	u64 samples[CPUCLOCK_SAMPLE_MAX];
 
 	if (!READ_ONCE(pct->timers_active)) {
 		if (start)
@@ -365,7 +370,7 @@ static int posix_cpu_clock_get(const clo
 	u64 t;
 
 	rcu_read_lock();
-	tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock));
+	tsk = pid_task(__pid_for_clock(clock, CPUCLOCK_DVFS, true), clock_pid_type(clock));
 	if (!tsk) {
 		rcu_read_unlock();
 		return -EINVAL;
@@ -864,7 +869,7 @@ static void collect_posix_cputimers(stru
 	struct posix_cputimer_base *base = pct->bases;
 	int i;
 
-	for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
+	for (i = 0; i < CPUCLOCK_SAMPLE_MAX; i++, base++) {
 		base->nextevt = collect_timerqueue(&base->tqhead, firing,
 						    samples[i]);
 	}
@@ -901,7 +906,7 @@ static void check_thread_timers(struct t
 				struct list_head *firing)
 {
 	struct posix_cputimers *pct = &tsk->posix_cputimers;
-	u64 samples[CPUCLOCK_MAX];
+	u64 samples[CPUCLOCK_SAMPLE_MAX];
 	unsigned long soft;
 
 	if (dl_task(tsk))
@@ -979,7 +984,7 @@ static void check_process_timers(struct
 {
 	struct signal_struct *const sig = tsk->signal;
 	struct posix_cputimers *pct = &sig->posix_cputimers;
-	u64 samples[CPUCLOCK_MAX];
+	u64 samples[CPUCLOCK_SAMPLE_MAX];
 	unsigned long soft;
 
 	/*
@@ -1098,7 +1103,7 @@ task_cputimers_expired(const u64 *sample
 {
 	int i;
 
-	for (i = 0; i < CPUCLOCK_MAX; i++) {
+	for (i = 0; i < CPUCLOCK_SAMPLE_MAX; i++) {
 		if (samples[i] >= pct->bases[i].nextevt)
 			return true;
 	}
@@ -1121,7 +1126,7 @@ static inline bool fastpath_timer_check(
 	struct signal_struct *sig;
 
 	if (!expiry_cache_is_inactive(pct)) {
-		u64 samples[CPUCLOCK_MAX];
+		u64 samples[CPUCLOCK_SAMPLE_MAX];
 
 		task_sample_cputime(tsk, samples);
 		if (task_cputimers_expired(samples, pct))
@@ -1146,7 +1151,7 @@ static inline bool fastpath_timer_check(
 	 * delays with signals actually getting sent are expected.
 	 */
 	if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
-		u64 samples[CPUCLOCK_MAX];
+		u64 samples[CPUCLOCK_SAMPLE_MAX];
 
 		proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
 					   samples);
@@ -1669,7 +1674,7 @@ static int thread_cpu_timer_create(struc
 	return posix_cpu_timer_create(timer);
 }
 static int thread_dvfs_cpu_clock_get(const clockid_t which_clock,
-				struct timespec64 *tp)
+				     struct timespec64 *tp)
 {
 	return posix_cpu_clock_get(THREAD_DVFS_CLOCK, tp);
 }