[PATCH v2 04/13] arm64/arch_timer: Provide noinstr sched_clock_read() functions

Peter Zijlstra posted 13 patches 2 years, 8 months ago
[PATCH v2 04/13] arm64/arch_timer: Provide noinstr sched_clock_read() functions
Posted by Peter Zijlstra 2 years, 8 months ago
With the intent to provide local_clock_noinstr(), a variant of
local_clock() that's safe to be called from noinstr code (with the
assumption that any such code will already be non-preemptible),
prepare for things by providing a noinstr sched_clock_read() function.

Specifically, preempt_enable_*() calls out to schedule(), which upsets
noinstr validation efforts.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/arm64/include/asm/arch_timer.h  |    8 ----
 drivers/clocksource/arm_arch_timer.c |   60 ++++++++++++++++++++++++++---------
 2 files changed, 47 insertions(+), 21 deletions(-)

--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_rea
 
 #define arch_timer_reg_read_stable(reg)					\
 	({								\
-		u64 _val;						\
-									\
-		preempt_disable_notrace();				\
-		_val = erratum_handler(read_ ## reg)();			\
-		preempt_enable_notrace();				\
-									\
-		_val;							\
+		erratum_handler(read_ ## reg)();			\
 	})
 
 /*
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -191,22 +191,40 @@ u32 arch_timer_reg_read(int access, enum
 	return val;
 }
 
-static notrace u64 arch_counter_get_cntpct_stable(void)
+static noinstr u64 _arch_counter_get_cntpct_stable(void)
 {
 	return __arch_counter_get_cntpct_stable();
 }
 
-static notrace u64 arch_counter_get_cntpct(void)
+static notrace u64 arch_counter_get_cntpct_stable(void)
+{
+	u64 val;
+	preempt_disable_notrace();
+	val = __arch_counter_get_cntpct_stable();
+	preempt_enable_notrace();
+	return val;
+}
+
+static noinstr u64 arch_counter_get_cntpct(void)
 {
 	return __arch_counter_get_cntpct();
 }
 
-static notrace u64 arch_counter_get_cntvct_stable(void)
+static noinstr u64 _arch_counter_get_cntvct_stable(void)
 {
 	return __arch_counter_get_cntvct_stable();
 }
 
-static notrace u64 arch_counter_get_cntvct(void)
+static notrace u64 arch_counter_get_cntvct_stable(void)
+{
+	u64 val;
+	preempt_disable_notrace();
+	val = __arch_counter_get_cntvct_stable();
+	preempt_enable_notrace();
+	return val;
+}
+
+static noinstr u64 arch_counter_get_cntvct(void)
 {
 	return __arch_counter_get_cntvct();
 }
@@ -753,14 +771,14 @@ static int arch_timer_set_next_event_phy
 	return 0;
 }
 
-static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
+static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
 {
 	u32 cnt_lo, cnt_hi, tmp_hi;
 
 	do {
-		cnt_hi = readl_relaxed(t->base + offset_lo + 4);
-		cnt_lo = readl_relaxed(t->base + offset_lo);
-		tmp_hi = readl_relaxed(t->base + offset_lo + 4);
+		cnt_hi = __raw_readl(t->base + offset_lo + 4);
+		cnt_lo = __raw_readl(t->base + offset_lo);
+		tmp_hi = __raw_readl(t->base + offset_lo + 4);
 	} while (cnt_hi != tmp_hi);
 
 	return ((u64) cnt_hi << 32) | cnt_lo;
@@ -1060,7 +1078,7 @@ bool arch_timer_evtstrm_available(void)
 	return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
 }
 
-static u64 arch_counter_get_cntvct_mem(void)
+static noinstr u64 arch_counter_get_cntvct_mem(void)
 {
 	return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO);
 }
@@ -1074,6 +1092,13 @@ struct arch_timer_kvm_info *arch_timer_g
 
 static void __init arch_counter_register(unsigned type)
 {
+	/*
+	 * Default to cp15 based access because arm64 uses this function for
+	 * sched_clock() before DT is probed and the cp15 method is guaranteed
+	 * to exist on arm64. arm doesn't use this before DT is probed so even
+	 * if we don't have the cp15 accessors we won't have a problem.
+	 */
+	u64 (*scr)(void) = arch_counter_get_cntvct;
 	u64 start_count;
 	int width;
 
@@ -1083,21 +1108,28 @@ static void __init arch_counter_register
 
 		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
 		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
-			if (arch_timer_counter_has_wa())
+			if (arch_timer_counter_has_wa()) {
 				rd = arch_counter_get_cntvct_stable;
-			else
+				scr = _arch_counter_get_cntvct_stable;
+			} else {
 				rd = arch_counter_get_cntvct;
+				scr = arch_counter_get_cntvct;
+			}
 		} else {
-			if (arch_timer_counter_has_wa())
+			if (arch_timer_counter_has_wa()) {
 				rd = arch_counter_get_cntpct_stable;
-			else
+				scr = _arch_counter_get_cntpct_stable;
+			} else {
 				rd = arch_counter_get_cntpct;
+				scr = arch_counter_get_cntpct;
+			}
 		}
 
 		arch_timer_read_counter = rd;
 		clocksource_counter.vdso_clock_mode = vdso_default;
 	} else {
 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
+		scr = arch_counter_get_cntvct_mem;
 	}
 
 	width = arch_counter_get_width();
@@ -1113,7 +1145,7 @@ static void __init arch_counter_register
 	timecounter_init(&arch_timer_kvm_info.timecounter,
 			 &cyclecounter, start_count);
 
-	sched_clock_register(arch_timer_read_counter, width, arch_timer_rate);
+	sched_clock_register(scr, width, arch_timer_rate);
 }
 
 static void arch_timer_stop(struct clock_event_device *clk)
Re: [PATCH v2 04/13] arm64/arch_timer: Provide noinstr sched_clock_read() functions
Posted by Valentin Schneider 2 years, 8 months ago
On 19/05/23 12:21, Peter Zijlstra wrote:
> With the intent to provide local_clock_noinstr(), a variant of
> local_clock() that's safe to be called from noinstr code (with the
> assumption that any such code will already be non-preemptible),
> prepare for things by providing a noinstr sched_clock_read() function.
>
> Specifically, preempt_enable_*() calls out to schedule(), which upsets
> noinstr validation efforts.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/arm64/include/asm/arch_timer.h  |    8 ----
>  drivers/clocksource/arm_arch_timer.c |   60 ++++++++++++++++++++++++++---------
>  2 files changed, 47 insertions(+), 21 deletions(-)
>
> --- a/arch/arm64/include/asm/arch_timer.h
> +++ b/arch/arm64/include/asm/arch_timer.h
> @@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_rea
>
>  #define arch_timer_reg_read_stable(reg)					\
>       ({								\
> -		u64 _val;						\
> -									\
> -		preempt_disable_notrace();				\
> -		_val = erratum_handler(read_ ## reg)();			\
> -		preempt_enable_notrace();				\
> -									\
> -		_val;							\
> +		erratum_handler(read_ ## reg)();			\
>       })
>
>  /*
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -191,22 +191,40 @@ u32 arch_timer_reg_read(int access, enum
>       return val;
>  }
>
> -static notrace u64 arch_counter_get_cntpct_stable(void)
> +static noinstr u64 _arch_counter_get_cntpct_stable(void)
>  {
>       return __arch_counter_get_cntpct_stable();
>  }
>

I tripped over the underscores when reviewing this :( This must be
getting close to the symbol length limit, but could we give this a helpful
prefix or suffix? raw_* or *_noinstr?

> @@ -1074,6 +1092,13 @@ struct arch_timer_kvm_info *arch_timer_g
>
>  static void __init arch_counter_register(unsigned type)
>  {
> +	/*
> +	 * Default to cp15 based access because arm64 uses this function for
> +	 * sched_clock() before DT is probed and the cp15 method is guaranteed
> +	 * to exist on arm64. arm doesn't use this before DT is probed so even
> +	 * if we don't have the cp15 accessors we won't have a problem.
> +	 */
> +	u64 (*scr)(void) = arch_counter_get_cntvct;

So this bit sent me on a little spelunking session :-)

From a control flow perspective the initialization isn't required, but then
I looked into the comment and found it comes from the
arch_timer_read_counter() definition... Which itself doesn't get used by
sched_clock() until the sched_clock_register() below!

So AFAICT that comment was true as of

  220069945b29 ("clocksource: arch_timer: Add support for memory mapped timers")

but not after a commit that came 2 months later:

  65cd4f6c99c1 ("arch_timer: Move to generic sched_clock framework")

which IIUC made arm/arm64 follow the default approach of using the
jiffy-based sched_clock() before probing DT/ACPI and registering a "proper"
sched_clock.

All of that to say: the comment about arch_timer_read_counter() vs early
sched_clock() doesn't apply anymore, but I think we need to keep its
initalization around for stuff like get_cycles(). This initialization here
should be OK to put to the bin, though.
Re: [PATCH v2 04/13] arm64/arch_timer: Provide noinstr sched_clock_read() functions
Posted by Peter Zijlstra 2 years, 8 months ago
On Wed, May 24, 2023 at 05:40:47PM +0100, Valentin Schneider wrote:
> On 19/05/23 12:21, Peter Zijlstra wrote:
> > With the intent to provide local_clock_noinstr(), a variant of
> > local_clock() that's safe to be called from noinstr code (with the
> > assumption that any such code will already be non-preemptible),
> > prepare for things by providing a noinstr sched_clock_read() function.
> >
> > Specifically, preempt_enable_*() calls out to schedule(), which upsets
> > noinstr validation efforts.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  arch/arm64/include/asm/arch_timer.h  |    8 ----
> >  drivers/clocksource/arm_arch_timer.c |   60 ++++++++++++++++++++++++++---------
> >  2 files changed, 47 insertions(+), 21 deletions(-)
> >
> > --- a/arch/arm64/include/asm/arch_timer.h
> > +++ b/arch/arm64/include/asm/arch_timer.h
> > @@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_rea
> >
> >  #define arch_timer_reg_read_stable(reg)					\
> >       ({								\
> > -		u64 _val;						\
> > -									\
> > -		preempt_disable_notrace();				\
> > -		_val = erratum_handler(read_ ## reg)();			\
> > -		preempt_enable_notrace();				\
> > -									\
> > -		_val;							\
> > +		erratum_handler(read_ ## reg)();			\
> >       })
> >
> >  /*
> > --- a/drivers/clocksource/arm_arch_timer.c
> > +++ b/drivers/clocksource/arm_arch_timer.c
> > @@ -191,22 +191,40 @@ u32 arch_timer_reg_read(int access, enum
> >       return val;
> >  }
> >
> > -static notrace u64 arch_counter_get_cntpct_stable(void)
> > +static noinstr u64 _arch_counter_get_cntpct_stable(void)
> >  {
> >       return __arch_counter_get_cntpct_stable();
> >  }
> >
> 
> I tripped over the underscores when reviewing this :( This must be
> getting close to the symbol length limit, but could we give this a helpful
> prefix or suffix? raw_* or *_noinstr?
> 
> > @@ -1074,6 +1092,13 @@ struct arch_timer_kvm_info *arch_timer_g
> >
> >  static void __init arch_counter_register(unsigned type)
> >  {
> > +	/*
> > +	 * Default to cp15 based access because arm64 uses this function for
> > +	 * sched_clock() before DT is probed and the cp15 method is guaranteed
> > +	 * to exist on arm64. arm doesn't use this before DT is probed so even
> > +	 * if we don't have the cp15 accessors we won't have a problem.
> > +	 */
> > +	u64 (*scr)(void) = arch_counter_get_cntvct;
> 
> So this bit sent me on a little spelunking session :-)
> 
> From a control flow perspective the initialization isn't required, but then
> I looked into the comment and found it comes from the
> arch_timer_read_counter() definition... Which itself doesn't get used by
> sched_clock() until the sched_clock_register() below!
> 
> So AFAICT that comment was true as of
> 
>   220069945b29 ("clocksource: arch_timer: Add support for memory mapped timers")
> 
> but not after a commit that came 2 months later:
> 
>   65cd4f6c99c1 ("arch_timer: Move to generic sched_clock framework")
> 
> which IIUC made arm/arm64 follow the default approach of using the
> jiffy-based sched_clock() before probing DT/ACPI and registering a "proper"
> sched_clock.
> 
> All of that to say: the comment about arch_timer_read_counter() vs early
> sched_clock() doesn't apply anymore, but I think we need to keep its
> initalization around for stuff like get_cycles(). This initialization here
> should be OK to put to the bin, though.

Something like the below folded in then?

---
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -191,7 +191,7 @@ u32 arch_timer_reg_read(int access, enum
 	return val;
 }
 
-static noinstr u64 _arch_counter_get_cntpct_stable(void)
+static noinstr u64 raw_counter_get_cntpct_stable(void)
 {
 	return __arch_counter_get_cntpct_stable();
 }
@@ -210,7 +210,7 @@ static noinstr u64 arch_counter_get_cntp
 	return __arch_counter_get_cntpct();
 }
 
-static noinstr u64 _arch_counter_get_cntvct_stable(void)
+static noinstr u64 raw_counter_get_cntvct_stable(void)
 {
 	return __arch_counter_get_cntvct_stable();
 }
@@ -1092,13 +1092,7 @@ struct arch_timer_kvm_info *arch_timer_g
 
 static void __init arch_counter_register(unsigned type)
 {
-	/*
-	 * Default to cp15 based access because arm64 uses this function for
-	 * sched_clock() before DT is probed and the cp15 method is guaranteed
-	 * to exist on arm64. arm doesn't use this before DT is probed so even
-	 * if we don't have the cp15 accessors we won't have a problem.
-	 */
-	u64 (*scr)(void) = arch_counter_get_cntvct;
+	u64 (*scr)(void);
 	u64 start_count;
 	int width;
 
@@ -1110,7 +1104,7 @@ static void __init arch_counter_register
 		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
 			if (arch_timer_counter_has_wa()) {
 				rd = arch_counter_get_cntvct_stable;
-				scr = _arch_counter_get_cntvct_stable;
+				scr = raw_counter_get_cntvct_stable;
 			} else {
 				rd = arch_counter_get_cntvct;
 				scr = arch_counter_get_cntvct;
@@ -1118,7 +1112,7 @@ static void __init arch_counter_register
 		} else {
 			if (arch_timer_counter_has_wa()) {
 				rd = arch_counter_get_cntpct_stable;
-				scr = _arch_counter_get_cntpct_stable;
+				scr = raw_counter_get_cntpct_stable;
 			} else {
 				rd = arch_counter_get_cntpct;
 				scr = arch_counter_get_cntpct;
Re: [PATCH v2 04/13] arm64/arch_timer: Provide noinstr sched_clock_read() functions
Posted by Valentin Schneider 2 years, 8 months ago
On 02/06/23 13:54, Peter Zijlstra wrote:
> On Wed, May 24, 2023 at 05:40:47PM +0100, Valentin Schneider wrote:
>>
>> So this bit sent me on a little spelunking session :-)
>>
>> From a control flow perspective the initialization isn't required, but then
>> I looked into the comment and found it comes from the
>> arch_timer_read_counter() definition... Which itself doesn't get used by
>> sched_clock() until the sched_clock_register() below!
>>
>> So AFAICT that comment was true as of
>>
>>   220069945b29 ("clocksource: arch_timer: Add support for memory mapped timers")
>>
>> but not after a commit that came 2 months later:
>>
>>   65cd4f6c99c1 ("arch_timer: Move to generic sched_clock framework")
>>
>> which IIUC made arm/arm64 follow the default approach of using the
>> jiffy-based sched_clock() before probing DT/ACPI and registering a "proper"
>> sched_clock.
>>
>> All of that to say: the comment about arch_timer_read_counter() vs early
>> sched_clock() doesn't apply anymore, but I think we need to keep its
>> initalization around for stuff like get_cycles(). This initialization here
>> should be OK to put to the bin, though.
>
> Something like the below folded in then?
>

Much better, thank you!
[tip: sched/core] arm64/arch_timer: Provide noinstr sched_clock_read() functions
Posted by tip-bot2 for Peter Zijlstra 2 years, 8 months ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     24ee7607b286b44a5112ced38652df14cd80d5e2
Gitweb:        https://git.kernel.org/tip/24ee7607b286b44a5112ced38652df14cd80d5e2
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Fri, 19 May 2023 12:21:02 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 05 Jun 2023 21:11:05 +02:00

arm64/arch_timer: Provide noinstr sched_clock_read() functions

With the intent to provide local_clock_noinstr(), a variant of
local_clock() that's safe to be called from noinstr code (with the
assumption that any such code will already be non-preemptible),
prepare for things by providing a noinstr sched_clock_read() function.

Specifically, preempt_enable_*() calls out to schedule(), which upsets
noinstr validation efforts.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>  # Hyper-V
Link: https://lore.kernel.org/r/20230519102715.435618812@infradead.org
---
 arch/arm64/include/asm/arch_timer.h  |  8 +----
 drivers/clocksource/arm_arch_timer.c | 54 ++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index af1fafb..934c658 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void)
 
 #define arch_timer_reg_read_stable(reg)					\
 	({								\
-		u64 _val;						\
-									\
-		preempt_disable_notrace();				\
-		_val = erratum_handler(read_ ## reg)();			\
-		preempt_enable_notrace();				\
-									\
-		_val;							\
+		erratum_handler(read_ ## reg)();			\
 	})
 
 /*
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index e09d442..b23d23b 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -191,22 +191,40 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
 	return val;
 }
 
-static notrace u64 arch_counter_get_cntpct_stable(void)
+static noinstr u64 raw_counter_get_cntpct_stable(void)
 {
 	return __arch_counter_get_cntpct_stable();
 }
 
-static notrace u64 arch_counter_get_cntpct(void)
+static notrace u64 arch_counter_get_cntpct_stable(void)
+{
+	u64 val;
+	preempt_disable_notrace();
+	val = __arch_counter_get_cntpct_stable();
+	preempt_enable_notrace();
+	return val;
+}
+
+static noinstr u64 arch_counter_get_cntpct(void)
 {
 	return __arch_counter_get_cntpct();
 }
 
-static notrace u64 arch_counter_get_cntvct_stable(void)
+static noinstr u64 raw_counter_get_cntvct_stable(void)
 {
 	return __arch_counter_get_cntvct_stable();
 }
 
-static notrace u64 arch_counter_get_cntvct(void)
+static notrace u64 arch_counter_get_cntvct_stable(void)
+{
+	u64 val;
+	preempt_disable_notrace();
+	val = __arch_counter_get_cntvct_stable();
+	preempt_enable_notrace();
+	return val;
+}
+
+static noinstr u64 arch_counter_get_cntvct(void)
 {
 	return __arch_counter_get_cntvct();
 }
@@ -753,14 +771,14 @@ static int arch_timer_set_next_event_phys(unsigned long evt,
 	return 0;
 }
 
-static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
+static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
 {
 	u32 cnt_lo, cnt_hi, tmp_hi;
 
 	do {
-		cnt_hi = readl_relaxed(t->base + offset_lo + 4);
-		cnt_lo = readl_relaxed(t->base + offset_lo);
-		tmp_hi = readl_relaxed(t->base + offset_lo + 4);
+		cnt_hi = __raw_readl(t->base + offset_lo + 4);
+		cnt_lo = __raw_readl(t->base + offset_lo);
+		tmp_hi = __raw_readl(t->base + offset_lo + 4);
 	} while (cnt_hi != tmp_hi);
 
 	return ((u64) cnt_hi << 32) | cnt_lo;
@@ -1060,7 +1078,7 @@ bool arch_timer_evtstrm_available(void)
 	return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
 }
 
-static u64 arch_counter_get_cntvct_mem(void)
+static noinstr u64 arch_counter_get_cntvct_mem(void)
 {
 	return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO);
 }
@@ -1074,6 +1092,7 @@ struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
 
 static void __init arch_counter_register(unsigned type)
 {
+	u64 (*scr)(void);
 	u64 start_count;
 	int width;
 
@@ -1083,21 +1102,28 @@ static void __init arch_counter_register(unsigned type)
 
 		if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
 		    arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
-			if (arch_timer_counter_has_wa())
+			if (arch_timer_counter_has_wa()) {
 				rd = arch_counter_get_cntvct_stable;
-			else
+				scr = raw_counter_get_cntvct_stable;
+			} else {
 				rd = arch_counter_get_cntvct;
+				scr = arch_counter_get_cntvct;
+			}
 		} else {
-			if (arch_timer_counter_has_wa())
+			if (arch_timer_counter_has_wa()) {
 				rd = arch_counter_get_cntpct_stable;
-			else
+				scr = raw_counter_get_cntpct_stable;
+			} else {
 				rd = arch_counter_get_cntpct;
+				scr = arch_counter_get_cntpct;
+			}
 		}
 
 		arch_timer_read_counter = rd;
 		clocksource_counter.vdso_clock_mode = vdso_default;
 	} else {
 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
+		scr = arch_counter_get_cntvct_mem;
 	}
 
 	width = arch_counter_get_width();
@@ -1113,7 +1139,7 @@ static void __init arch_counter_register(unsigned type)
 	timecounter_init(&arch_timer_kvm_info.timecounter,
 			 &cyclecounter, start_count);
 
-	sched_clock_register(arch_timer_read_counter, width, arch_timer_rate);
+	sched_clock_register(scr, width, arch_timer_rate);
 }
 
 static void arch_timer_stop(struct clock_event_device *clk)
Re: [tip: sched/core] arm64/arch_timer: Provide noinstr sched_clock_read() functions
Posted by Peter Zijlstra 2 years, 8 months ago
On Mon, Jun 05, 2023 at 07:16:18PM -0000, tip-bot2 for Peter Zijlstra wrote:
> @@ -753,14 +771,14 @@ static int arch_timer_set_next_event_phys(unsigned long evt,
>  	return 0;
>  }
>  
> -static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
> +static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
>  {
>  	u32 cnt_lo, cnt_hi, tmp_hi;
>  
>  	do {
> -		cnt_hi = readl_relaxed(t->base + offset_lo + 4);
> -		cnt_lo = readl_relaxed(t->base + offset_lo);
> -		tmp_hi = readl_relaxed(t->base + offset_lo + 4);
> +		cnt_hi = __raw_readl(t->base + offset_lo + 4);
> +		cnt_lo = __raw_readl(t->base + offset_lo);
> +		tmp_hi = __raw_readl(t->base + offset_lo + 4);
>  	} while (cnt_hi != tmp_hi);
>  
>  	return ((u64) cnt_hi << 32) | cnt_lo;

Mark noted that this looses the byteswap :/


---
Subject: arm64/arch_timer: Fix MMIO byteswap

The readl_relaxed() to __raw_readl() change meant to loose the
instrumentation, but also (inadvertently) lost the byteswap.

Fixes: 24ee7607b286 ("arm64/arch_timer: Provide noinstr sched_clock_read() functions")
Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index b23d23b033cc..e733a2a1927a 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -776,9 +776,9 @@ static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
 	u32 cnt_lo, cnt_hi, tmp_hi;
 
 	do {
-		cnt_hi = __raw_readl(t->base + offset_lo + 4);
-		cnt_lo = __raw_readl(t->base + offset_lo);
-		tmp_hi = __raw_readl(t->base + offset_lo + 4);
+		cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
+		cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo));
+		tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
 	} while (cnt_hi != tmp_hi);
 
 	return ((u64) cnt_hi << 32) | cnt_lo;
Re: [tip: sched/core] arm64/arch_timer: Provide noinstr sched_clock_read() functions
Posted by Mark Rutland 2 years, 8 months ago
On Tue, Jun 06, 2023 at 10:06:14AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 05, 2023 at 07:16:18PM -0000, tip-bot2 for Peter Zijlstra wrote:
> > @@ -753,14 +771,14 @@ static int arch_timer_set_next_event_phys(unsigned long evt,
> >  	return 0;
> >  }
> >  
> > -static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
> > +static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
> >  {
> >  	u32 cnt_lo, cnt_hi, tmp_hi;
> >  
> >  	do {
> > -		cnt_hi = readl_relaxed(t->base + offset_lo + 4);
> > -		cnt_lo = readl_relaxed(t->base + offset_lo);
> > -		tmp_hi = readl_relaxed(t->base + offset_lo + 4);
> > +		cnt_hi = __raw_readl(t->base + offset_lo + 4);
> > +		cnt_lo = __raw_readl(t->base + offset_lo);
> > +		tmp_hi = __raw_readl(t->base + offset_lo + 4);
> >  	} while (cnt_hi != tmp_hi);
> >  
> >  	return ((u64) cnt_hi << 32) | cnt_lo;
> 
> Mark noted that this looses the byteswap :/
> 
> 
> ---
> Subject: arm64/arch_timer: Fix MMIO byteswap
> 
> The readl_relaxed() to __raw_readl() change meant to loose the
> instrumentation, but also (inadvertently) lost the byteswap.
> 
> Fixes: 24ee7607b286 ("arm64/arch_timer: Provide noinstr sched_clock_read() functions")
> Reported-by: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index b23d23b033cc..e733a2a1927a 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -776,9 +776,9 @@ static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
>  	u32 cnt_lo, cnt_hi, tmp_hi;
>  
>  	do {
> -		cnt_hi = __raw_readl(t->base + offset_lo + 4);
> -		cnt_lo = __raw_readl(t->base + offset_lo);
> -		tmp_hi = __raw_readl(t->base + offset_lo + 4);
> +		cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
> +		cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo));
> +		tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
>  	} while (cnt_hi != tmp_hi);

This LGTM, so FWIW:

Acked-by: Mark Rutland <mark.rutland@arm.com>

Longer term it'd be good to have noistr-safe forms of the MMIO accessors, but
from a quick play that's a much bigger piece of work, and I think this is fine
for now!

Thanks,
Mark.
[tip: sched/core] arm64/arch_timer: Fix MMIO byteswap
Posted by tip-bot2 for Peter Zijlstra 2 years, 8 months ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     5416bf1cf5602ab3a38b4c0d15ccec1ca4199633
Gitweb:        https://git.kernel.org/tip/5416bf1cf5602ab3a38b4c0d15ccec1ca4199633
Author:        Peter Zijlstra <peterz@infradead.org>
AuthorDate:    Tue, 06 Jun 2023 10:06:14 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 06 Jun 2023 10:19:51 +02:00

arm64/arch_timer: Fix MMIO byteswap

The readl_relaxed() to __raw_readl() change meant to loose the
instrumentation, but also (inadvertently) lost the byteswap.

Fixes: 24ee7607b286 ("arm64/arch_timer: Provide noinstr sched_clock_read() functions")
Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/20230606080614.GB905437@hirez.programming.kicks-ass.net
---
 drivers/clocksource/arm_arch_timer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index b23d23b..e733a2a 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -776,9 +776,9 @@ static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
 	u32 cnt_lo, cnt_hi, tmp_hi;
 
 	do {
-		cnt_hi = __raw_readl(t->base + offset_lo + 4);
-		cnt_lo = __raw_readl(t->base + offset_lo);
-		tmp_hi = __raw_readl(t->base + offset_lo + 4);
+		cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
+		cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo));
+		tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
 	} while (cnt_hi != tmp_hi);
 
 	return ((u64) cnt_hi << 32) | cnt_lo;