From: Thomas Gleixner <tglx@linutronix.de>
From: Thomas Gleixner <tglx@linutronix.de>
struct timekeeper is ordered suboptimal vs. cachelines. The layout,
including the preceding seqcount (see struct tk_core in timekeeper.c) is:
cacheline 0: seqcount, tkr_mono
cacheline 1: tkr_raw, xtime_sec
cacheline 2: ktime_sec ... tai_offset, internal variables
cacheline 3: next_leap_ktime, raw_sec, internal variables
cacheline 4: internal variables
So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW
will use either cachelines 0 + 1 or cachelines 0 + 2. Access to
CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3.
Reorder the members so that the result is more efficient:
cacheline 0: seqcount, tkr_mono
cacheline 1: xtime_sec, ktime_sec ... tai_offset
cacheline 2: tkr_raw, raw_sec
cacheline 3: internal variables
cacheline 4: internal variables
That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW
access will use cachelines 0 + 2.
Update kernel-doc and fix formatting issues while at it. Also fix a typo
in struct tk_read_base kernel-doc.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
include/linux/timekeeper_internal.h | 102 +++++++++++++++++++++---------------
1 file changed, 61 insertions(+), 41 deletions(-)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 902c20ef495a..430e40549136 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -26,7 +26,7 @@
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessors.
+ * for the fast NMI safe accessors.
*
* @base_real is for the fast NMI safe accessor to allow reading clock
* realtime from any context.
@@ -44,33 +44,41 @@ struct tk_read_base {
/**
* struct timekeeper - Structure holding internal timekeeping values.
- * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
- * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
- * @xtime_sec: Current CLOCK_REALTIME time in seconds
- * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
- * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
- * @offs_real: Offset clock monotonic -> clock realtime
- * @offs_boot: Offset clock monotonic -> clock boottime
- * @offs_tai: Offset clock monotonic -> clock tai
- * @tai_offset: The current UTC to TAI offset in seconds
- * @clock_was_set_seq: The sequence number of clock was set events
- * @cs_was_changed_seq: The sequence number of clocksource change events
- * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
- * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
- * @cycle_interval: Number of clock cycles in one NTP interval
- * @xtime_interval: Number of clock shifted nano seconds in one NTP
- * interval.
- * @xtime_remainder: Shifted nano seconds left over when rounding
- * @cycle_interval
- * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
- * @ntp_error: Difference between accumulated time and NTP time in ntp
- * shifted nano seconds.
- * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
- * ntp shifted nano seconds.
- * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
- * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
- * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
+ * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
+ * @xtime_sec: Current CLOCK_REALTIME time in seconds
+ * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
+ * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
+ * @offs_real: Offset clock monotonic -> clock realtime
+ * @offs_boot: Offset clock monotonic -> clock boottime
+ * @offs_tai: Offset clock monotonic -> clock tai
+ * @tai_offset: The current UTC to TAI offset in seconds
+ * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
+ * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
+ * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_was_changed_seq: The sequence number of clocksource change events
+ * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @cycle_interval: Number of clock cycles in one NTP interval
+ * @xtime_interval: Number of clock shifted nano seconds in one NTP
+ * interval.
+ * @xtime_remainder: Shifted nano seconds left over when rounding
+ * @cycle_interval
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
+ * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
+ * @ntp_tick: The ntp_tick_length() value currently being
+ * used. This cached copy ensures we consistently
+ * apply the tick length for an entire tick, as
+ * ntp_tick_length may change mid-tick, and we don't
+ * want to apply that new value to the tick in
+ * progress.
+ * @ntp_error: Difference between accumulated time and NTP time in ntp
+ * shifted nano seconds.
+ * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds.
+ * @ntp_err_mult: Multiplication factor for scaled math conversion
+ * @skip_second_overflow: Flag used to avoid updating NTP twice with same second
+ * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
+ * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
+ * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
*
* Note: For timespec(64) based interfaces wall_to_monotonic is what
* we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -88,10 +96,25 @@ struct tk_read_base {
*
* @monotonic_to_boottime is a timespec64 representation of @offs_boot to
* accelerate the VDSO update for CLOCK_BOOTTIME.
+ *
+ * The cacheline ordering of the structure is optimized for in kernel usage
+ * of the ktime_get() and ktime_get_ts64() family of time accessors. Struct
+ * timekeeper is prepended in the core timekeeeping code with a sequence
+ * count, which results in the following cacheline layout:
+ *
+ * 0: seqcount, tkr_mono
+ * 1: xtime_sec ... tai_offset
+ * 2: tkr_raw, raw_sec
+ * 3,4: Internal variables
+ *
+ * Cacheline 0,1 contain the data which is used for accessing
+ * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the
+ * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal
+ * variables which are only accessed during timekeeper updates once per
+ * tick.
*/
struct timekeeper {
struct tk_read_base tkr_mono;
- struct tk_read_base tkr_raw;
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
@@ -99,31 +122,28 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
+
+ struct tk_read_base tkr_raw;
+ u64 raw_sec;
+
+ /* The following members are for timekeeping internal use */
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
- ktime_t next_leap_ktime;
- u64 raw_sec;
+
struct timespec64 monotonic_to_boot;
- /* The following members are for timekeeping internal use */
u64 cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
u64 raw_interval;
- /* The ntp_tick_length() value currently being used.
- * This cached copy ensures we consistently apply the tick
- * length for an entire tick, as ntp_tick_length may change
- * mid-tick, and we don't want to apply that new value to
- * the tick in progress.
- */
+
+ ktime_t next_leap_ktime;
u64 ntp_tick;
- /* Difference between accumulated time and NTP time in ntp
- * shifted nano seconds. */
s64 ntp_error;
u32 ntp_error_shift;
u32 ntp_err_mult;
- /* Flag used to avoid updating NTP twice with same second */
u32 skip_second_overflow;
+
#ifdef CONFIG_DEBUG_TIMEKEEPING
long last_warning;
/*
--
2.39.5
On Wed, Oct 9, 2024 at 1:29 AM Anna-Maria Behnsen <anna-maria@linutronix.de> wrote: > > From: Thomas Gleixner <tglx@linutronix.de> > > From: Thomas Gleixner <tglx@linutronix.de> > > struct timekeeper is ordered suboptimal vs. cachelines. The layout, > including the preceding seqcount (see struct tk_core in timekeeper.c) is: > > cacheline 0: seqcount, tkr_mono > cacheline 1: tkr_raw, xtime_sec > cacheline 2: ktime_sec ... tai_offset, internal variables > cacheline 3: next_leap_ktime, raw_sec, internal variables > cacheline 4: internal variables > > So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW > will use either cachelines 0 + 1 or cachelines 0 + 2. Access to > CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3. > > Reorder the members so that the result is more efficient: > > cacheline 0: seqcount, tkr_mono > cacheline 1: xtime_sec, ktime_sec ... tai_offset > cacheline 2: tkr_raw, raw_sec > cacheline 3: internal variables > cacheline 4: internal variables > > That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW > access will use cachelines 0 + 2. > > Update kernel-doc and fix formatting issues while at it. Also fix a typo > in struct tk_read_base kernel-doc. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de> Acked-by: John Stultz <jstultz@google.com> > --- > include/linux/timekeeper_internal.h | 102 +++++++++++++++++++++--------------- > 1 file changed, 61 insertions(+), 41 deletions(-) > > diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h > index 902c20ef495a..430e40549136 100644 > --- a/include/linux/timekeeper_internal.h > +++ b/include/linux/timekeeper_internal.h > @@ -26,7 +26,7 @@ > * occupies a single 64byte cache line. > * > * The struct is separate from struct timekeeper as it is also used > - * for a fast NMI safe accessors. > + * for the fast NMI safe accessors. > * > * @base_real is for the fast NMI safe accessor to allow reading clock > * realtime from any context. > @@ -44,33 +44,41 @@ struct tk_read_base { > > /** > * struct timekeeper - Structure holding internal timekeeping values. > - * @tkr_mono: The readout base structure for CLOCK_MONOTONIC > - * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW > - * @xtime_sec: Current CLOCK_REALTIME time in seconds > - * @ktime_sec: Current CLOCK_MONOTONIC time in seconds > - * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset > - * @offs_real: Offset clock monotonic -> clock realtime > - * @offs_boot: Offset clock monotonic -> clock boottime > - * @offs_tai: Offset clock monotonic -> clock tai > - * @tai_offset: The current UTC to TAI offset in seconds > - * @clock_was_set_seq: The sequence number of clock was set events > - * @cs_was_changed_seq: The sequence number of clocksource change events > - * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second > - * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds > - * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset > - * @cycle_interval: Number of clock cycles in one NTP interval > - * @xtime_interval: Number of clock shifted nano seconds in one NTP > - * interval. > - * @xtime_remainder: Shifted nano seconds left over when rounding > - * @cycle_interval > - * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. > - * @ntp_error: Difference between accumulated time and NTP time in ntp > - * shifted nano seconds. > - * @ntp_error_shift: Shift conversion between clock shifted nano seconds and > - * ntp shifted nano seconds. > - * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) > - * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) > - * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) > + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC > + * @xtime_sec: Current CLOCK_REALTIME time in seconds > + * @ktime_sec: Current CLOCK_MONOTONIC time in seconds > + * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset > + * @offs_real: Offset clock monotonic -> clock realtime > + * @offs_boot: Offset clock monotonic -> clock boottime > + * @offs_tai: Offset clock monotonic -> clock tai > + * @tai_offset: The current UTC to TAI offset in seconds > + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW > + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds > + * @clock_was_set_seq: The sequence number of clock was set events > + * @cs_was_changed_seq: The sequence number of clocksource change events > + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset > + * @cycle_interval: Number of clock cycles in one NTP interval > + * @xtime_interval: Number of clock shifted nano seconds in one NTP > + * interval. > + * @xtime_remainder: Shifted nano seconds left over when rounding > + * @cycle_interval > + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. > + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second > + * @ntp_tick: The ntp_tick_length() value currently being > + * used. This cached copy ensures we consistently > + * apply the tick length for an entire tick, as > + * ntp_tick_length may change mid-tick, and we don't > + * want to apply that new value to the tick in > + * progress. > + * @ntp_error: Difference between accumulated time and NTP time in ntp > + * shifted nano seconds. > + * @ntp_error_shift: Shift conversion between clock shifted nano seconds and > + * ntp shifted nano seconds. > + * @ntp_err_mult: Multiplication factor for scaled math conversion > + * @skip_second_overflow: Flag used to avoid updating NTP twice with same second > + * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) > + * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) > + * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) > * > * Note: For timespec(64) based interfaces wall_to_monotonic is what > * we need to add to xtime (or xtime corrected for sub jiffy times) > @@ -88,10 +96,25 @@ struct tk_read_base { > * > * @monotonic_to_boottime is a timespec64 representation of @offs_boot to > * accelerate the VDSO update for CLOCK_BOOTTIME. > + * > + * The cacheline ordering of the structure is optimized for in kernel usage > + * of the ktime_get() and ktime_get_ts64() family of time accessors. Struct > + * timekeeper is prepended in the core timekeeeping code with a sequence > + * count, which results in the following cacheline layout: > + * > + * 0: seqcount, tkr_mono > + * 1: xtime_sec ... tai_offset > + * 2: tkr_raw, raw_sec > + * 3,4: Internal variables > + * > + * Cacheline 0,1 contain the data which is used for accessing > + * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the > + * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal > + * variables which are only accessed during timekeeper updates once per > + * tick. Would it make sense to add divider comments or something in the struct to make this more visible? I fret in the context of a patch, a + line adding a new structure element that breaks the ordered alignment might not be obvious. thanks -john
John Stultz <jstultz@google.com> writes: > On Wed, Oct 9, 2024 at 1:29 AM Anna-Maria Behnsen > <anna-maria@linutronix.de> wrote: >> >> From: Thomas Gleixner <tglx@linutronix.de> >> >> From: Thomas Gleixner <tglx@linutronix.de> >> >> struct timekeeper is ordered suboptimal vs. cachelines. The layout, >> including the preceding seqcount (see struct tk_core in timekeeper.c) is: >> >> cacheline 0: seqcount, tkr_mono >> cacheline 1: tkr_raw, xtime_sec >> cacheline 2: ktime_sec ... tai_offset, internal variables >> cacheline 3: next_leap_ktime, raw_sec, internal variables >> cacheline 4: internal variables >> >> So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW >> will use either cachelines 0 + 1 or cachelines 0 + 2. Access to >> CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3. >> >> Reorder the members so that the result is more efficient: >> >> cacheline 0: seqcount, tkr_mono >> cacheline 1: xtime_sec, ktime_sec ... tai_offset >> cacheline 2: tkr_raw, raw_sec >> cacheline 3: internal variables >> cacheline 4: internal variables >> >> That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW >> access will use cachelines 0 + 2. >> >> Update kernel-doc and fix formatting issues while at it. Also fix a typo >> in struct tk_read_base kernel-doc. >> >> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> >> Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de> > > Acked-by: John Stultz <jstultz@google.com> > >> --- >> include/linux/timekeeper_internal.h | 102 +++++++++++++++++++++--------------- >> 1 file changed, 61 insertions(+), 41 deletions(-) >> >> diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h >> index 902c20ef495a..430e40549136 100644 >> --- a/include/linux/timekeeper_internal.h >> +++ b/include/linux/timekeeper_internal.h >> @@ -26,7 +26,7 @@ >> * occupies a single 64byte cache line. >> * >> * The struct is separate from struct timekeeper as it is also used >> - * for a fast NMI safe accessors. >> + * for the fast NMI safe accessors. >> * >> * @base_real is for the fast NMI safe accessor to allow reading clock >> * realtime from any context. >> @@ -44,33 +44,41 @@ struct tk_read_base { >> >> /** >> * struct timekeeper - Structure holding internal timekeeping values. >> - * @tkr_mono: The readout base structure for CLOCK_MONOTONIC >> - * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW >> - * @xtime_sec: Current CLOCK_REALTIME time in seconds >> - * @ktime_sec: Current CLOCK_MONOTONIC time in seconds >> - * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset >> - * @offs_real: Offset clock monotonic -> clock realtime >> - * @offs_boot: Offset clock monotonic -> clock boottime >> - * @offs_tai: Offset clock monotonic -> clock tai >> - * @tai_offset: The current UTC to TAI offset in seconds >> - * @clock_was_set_seq: The sequence number of clock was set events >> - * @cs_was_changed_seq: The sequence number of clocksource change events >> - * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second >> - * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds >> - * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset >> - * @cycle_interval: Number of clock cycles in one NTP interval >> - * @xtime_interval: Number of clock shifted nano seconds in one NTP >> - * interval. >> - * @xtime_remainder: Shifted nano seconds left over when rounding >> - * @cycle_interval >> - * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. >> - * @ntp_error: Difference between accumulated time and NTP time in ntp >> - * shifted nano seconds. >> - * @ntp_error_shift: Shift conversion between clock shifted nano seconds and >> - * ntp shifted nano seconds. >> - * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) >> - * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) >> - * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) >> + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC >> + * @xtime_sec: Current CLOCK_REALTIME time in seconds >> + * @ktime_sec: Current CLOCK_MONOTONIC time in seconds >> + * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset >> + * @offs_real: Offset clock monotonic -> clock realtime >> + * @offs_boot: Offset clock monotonic -> clock boottime >> + * @offs_tai: Offset clock monotonic -> clock tai >> + * @tai_offset: The current UTC to TAI offset in seconds >> + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW >> + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds >> + * @clock_was_set_seq: The sequence number of clock was set events >> + * @cs_was_changed_seq: The sequence number of clocksource change events >> + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset >> + * @cycle_interval: Number of clock cycles in one NTP interval >> + * @xtime_interval: Number of clock shifted nano seconds in one NTP >> + * interval. >> + * @xtime_remainder: Shifted nano seconds left over when rounding >> + * @cycle_interval >> + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. >> + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second >> + * @ntp_tick: The ntp_tick_length() value currently being >> + * used. This cached copy ensures we consistently >> + * apply the tick length for an entire tick, as >> + * ntp_tick_length may change mid-tick, and we don't >> + * want to apply that new value to the tick in >> + * progress. >> + * @ntp_error: Difference between accumulated time and NTP time in ntp >> + * shifted nano seconds. >> + * @ntp_error_shift: Shift conversion between clock shifted nano seconds and >> + * ntp shifted nano seconds. >> + * @ntp_err_mult: Multiplication factor for scaled math conversion >> + * @skip_second_overflow: Flag used to avoid updating NTP twice with same second >> + * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) >> + * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) >> + * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) >> * >> * Note: For timespec(64) based interfaces wall_to_monotonic is what >> * we need to add to xtime (or xtime corrected for sub jiffy times) >> @@ -88,10 +96,25 @@ struct tk_read_base { >> * >> * @monotonic_to_boottime is a timespec64 representation of @offs_boot to >> * accelerate the VDSO update for CLOCK_BOOTTIME. >> + * >> + * The cacheline ordering of the structure is optimized for in kernel usage >> + * of the ktime_get() and ktime_get_ts64() family of time accessors. Struct >> + * timekeeper is prepended in the core timekeeeping code with a sequence >> + * count, which results in the following cacheline layout: >> + * >> + * 0: seqcount, tkr_mono >> + * 1: xtime_sec ... tai_offset >> + * 2: tkr_raw, raw_sec >> + * 3,4: Internal variables >> + * >> + * Cacheline 0,1 contain the data which is used for accessing >> + * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the >> + * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal >> + * variables which are only accessed during timekeeper updates once per >> + * tick. > > Would it make sense to add divider comments or something in the struct > to make this more visible? I fret in the context of a patch, a + line > adding a new structure element that breaks the ordered alignment might > not be obvious. This is an argument! I'll add simple comments with /* Cachline X: */ Thanks, Anna-Maria
From: Thomas Gleixner <tglx@linutronix.de>
struct timekeeper is ordered suboptimal vs. cachelines. The layout,
including the preceding seqcount (see struct tk_core in timekeeper.c) is:
cacheline 0: seqcount, tkr_mono
cacheline 1: tkr_raw, xtime_sec
cacheline 2: ktime_sec ... tai_offset, internal variables
cacheline 3: next_leap_ktime, raw_sec, internal variables
cacheline 4: internal variables
So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW
will use either cachelines 0 + 1 or cachelines 0 + 2. Access to
CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3.
Reorder the members so that the result is more efficient:
cacheline 0: seqcount, tkr_mono
cacheline 1: xtime_sec, ktime_sec ... tai_offset
cacheline 2: tkr_raw, raw_sec
cacheline 3: internal variables
cacheline 4: internal variables
That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW
access will use cachelines 0 + 2.
Update kernel-doc and fix formatting issues while at it. Also fix a typo
in struct tk_read_base kernel-doc.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
---
include/linux/timekeeper_internal.h | 106 +++++++++++++++++-----------
1 file changed, 65 insertions(+), 41 deletions(-)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 902c20ef495a..a3b6380a7777 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -26,7 +26,7 @@
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessors.
+ * for the fast NMI safe accessors.
*
* @base_real is for the fast NMI safe accessor to allow reading clock
* realtime from any context.
@@ -44,33 +44,41 @@ struct tk_read_base {
/**
* struct timekeeper - Structure holding internal timekeeping values.
- * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
- * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
- * @xtime_sec: Current CLOCK_REALTIME time in seconds
- * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
- * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
- * @offs_real: Offset clock monotonic -> clock realtime
- * @offs_boot: Offset clock monotonic -> clock boottime
- * @offs_tai: Offset clock monotonic -> clock tai
- * @tai_offset: The current UTC to TAI offset in seconds
- * @clock_was_set_seq: The sequence number of clock was set events
- * @cs_was_changed_seq: The sequence number of clocksource change events
- * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
- * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
- * @cycle_interval: Number of clock cycles in one NTP interval
- * @xtime_interval: Number of clock shifted nano seconds in one NTP
- * interval.
- * @xtime_remainder: Shifted nano seconds left over when rounding
- * @cycle_interval
- * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
- * @ntp_error: Difference between accumulated time and NTP time in ntp
- * shifted nano seconds.
- * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
- * ntp shifted nano seconds.
- * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
- * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
- * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
+ * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
+ * @xtime_sec: Current CLOCK_REALTIME time in seconds
+ * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
+ * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
+ * @offs_real: Offset clock monotonic -> clock realtime
+ * @offs_boot: Offset clock monotonic -> clock boottime
+ * @offs_tai: Offset clock monotonic -> clock tai
+ * @tai_offset: The current UTC to TAI offset in seconds
+ * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
+ * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
+ * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_was_changed_seq: The sequence number of clocksource change events
+ * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @cycle_interval: Number of clock cycles in one NTP interval
+ * @xtime_interval: Number of clock shifted nano seconds in one NTP
+ * interval.
+ * @xtime_remainder: Shifted nano seconds left over when rounding
+ * @cycle_interval
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
+ * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
+ * @ntp_tick: The ntp_tick_length() value currently being
+ * used. This cached copy ensures we consistently
+ * apply the tick length for an entire tick, as
+ * ntp_tick_length may change mid-tick, and we don't
+ * want to apply that new value to the tick in
+ * progress.
+ * @ntp_error: Difference between accumulated time and NTP time in ntp
+ * shifted nano seconds.
+ * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds.
+ * @ntp_err_mult: Multiplication factor for scaled math conversion
+ * @skip_second_overflow: Flag used to avoid updating NTP twice with same second
+ * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
+ * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
+ * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
*
* Note: For timespec(64) based interfaces wall_to_monotonic is what
* we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -88,10 +96,28 @@ struct tk_read_base {
*
* @monotonic_to_boottime is a timespec64 representation of @offs_boot to
* accelerate the VDSO update for CLOCK_BOOTTIME.
+ *
+ * The cacheline ordering of the structure is optimized for in kernel usage of
+ * the ktime_get() and ktime_get_ts64() family of time accessors. Struct
+ * timekeeper is prepended in the core timekeeping code with a sequence count,
+ * which results in the following cacheline layout:
+ *
+ * 0: seqcount, tkr_mono
+ * 1: xtime_sec ... tai_offset
+ * 2: tkr_raw, raw_sec
+ * 3,4: Internal variables
+ *
+ * Cacheline 0,1 contain the data which is used for accessing
+ * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the
+ * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal
+ * variables which are only accessed during timekeeper updates once per
+ * tick.
*/
struct timekeeper {
+ /* Cacheline 0 (together with prepended seqcount of timekeeper core): */
struct tk_read_base tkr_mono;
- struct tk_read_base tkr_raw;
+
+ /* Cacheline 1: */
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
@@ -99,31 +125,29 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
+
+ /* Cacheline 2: */
+ struct tk_read_base tkr_raw;
+ u64 raw_sec;
+
+ /* Cachline 3 and 4 (timekeeping internal variables): */
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
- ktime_t next_leap_ktime;
- u64 raw_sec;
+
struct timespec64 monotonic_to_boot;
- /* The following members are for timekeeping internal use */
u64 cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
u64 raw_interval;
- /* The ntp_tick_length() value currently being used.
- * This cached copy ensures we consistently apply the tick
- * length for an entire tick, as ntp_tick_length may change
- * mid-tick, and we don't want to apply that new value to
- * the tick in progress.
- */
+
+ ktime_t next_leap_ktime;
u64 ntp_tick;
- /* Difference between accumulated time and NTP time in ntp
- * shifted nano seconds. */
s64 ntp_error;
u32 ntp_error_shift;
u32 ntp_err_mult;
- /* Flag used to avoid updating NTP twice with same second */
u32 skip_second_overflow;
+
#ifdef CONFIG_DEBUG_TIMEKEEPING
long last_warning;
/*
--
2.39.5
The following commit has been merged into the timers/core branch of tip:
Commit-ID: 6860d28ccb2390b4eeda32ab2ce7eb10f71921e1
Gitweb: https://git.kernel.org/tip/6860d28ccb2390b4eeda32ab2ce7eb10f71921e1
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 15 Oct 2024 12:08:39 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Fri, 25 Oct 2024 19:49:13 +02:00
timekeeping: Reorder struct timekeeper
struct timekeeper is ordered suboptimal vs. cachelines. The layout,
including the preceding seqcount (see struct tk_core in timekeeper.c) is:
cacheline 0: seqcount, tkr_mono
cacheline 1: tkr_raw, xtime_sec
cacheline 2: ktime_sec ... tai_offset, internal variables
cacheline 3: next_leap_ktime, raw_sec, internal variables
cacheline 4: internal variables
So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW
will use either cachelines 0 + 1 or cachelines 0 + 2. Access to
CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3.
Reorder the members so that the result is more efficient:
cacheline 0: seqcount, tkr_mono
cacheline 1: xtime_sec, ktime_sec ... tai_offset
cacheline 2: tkr_raw, raw_sec
cacheline 3: internal variables
cacheline 4: internal variables
That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW
access will use cachelines 0 + 2.
Update kernel-doc and fix formatting issues while at it. Also fix a typo
in struct tk_read_base kernel-doc.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20241015100839.12702-1-anna-maria@linutronix.de
---
include/linux/timekeeper_internal.h | 106 ++++++++++++++++-----------
1 file changed, 65 insertions(+), 41 deletions(-)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 902c20e..a3b6380 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -26,7 +26,7 @@
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessors.
+ * for the fast NMI safe accessors.
*
* @base_real is for the fast NMI safe accessor to allow reading clock
* realtime from any context.
@@ -44,33 +44,41 @@ struct tk_read_base {
/**
* struct timekeeper - Structure holding internal timekeeping values.
- * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
- * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
- * @xtime_sec: Current CLOCK_REALTIME time in seconds
- * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
- * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
- * @offs_real: Offset clock monotonic -> clock realtime
- * @offs_boot: Offset clock monotonic -> clock boottime
- * @offs_tai: Offset clock monotonic -> clock tai
- * @tai_offset: The current UTC to TAI offset in seconds
- * @clock_was_set_seq: The sequence number of clock was set events
- * @cs_was_changed_seq: The sequence number of clocksource change events
- * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
- * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
- * @cycle_interval: Number of clock cycles in one NTP interval
- * @xtime_interval: Number of clock shifted nano seconds in one NTP
- * interval.
- * @xtime_remainder: Shifted nano seconds left over when rounding
- * @cycle_interval
- * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
- * @ntp_error: Difference between accumulated time and NTP time in ntp
- * shifted nano seconds.
- * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
- * ntp shifted nano seconds.
- * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
- * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
- * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
+ * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
+ * @xtime_sec: Current CLOCK_REALTIME time in seconds
+ * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
+ * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
+ * @offs_real: Offset clock monotonic -> clock realtime
+ * @offs_boot: Offset clock monotonic -> clock boottime
+ * @offs_tai: Offset clock monotonic -> clock tai
+ * @tai_offset: The current UTC to TAI offset in seconds
+ * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
+ * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
+ * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_was_changed_seq: The sequence number of clocksource change events
+ * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @cycle_interval: Number of clock cycles in one NTP interval
+ * @xtime_interval: Number of clock shifted nano seconds in one NTP
+ * interval.
+ * @xtime_remainder: Shifted nano seconds left over when rounding
+ * @cycle_interval
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
+ * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
+ * @ntp_tick: The ntp_tick_length() value currently being
+ * used. This cached copy ensures we consistently
+ * apply the tick length for an entire tick, as
+ * ntp_tick_length may change mid-tick, and we don't
+ * want to apply that new value to the tick in
+ * progress.
+ * @ntp_error: Difference between accumulated time and NTP time in ntp
+ * shifted nano seconds.
+ * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds.
+ * @ntp_err_mult: Multiplication factor for scaled math conversion
+ * @skip_second_overflow: Flag used to avoid updating NTP twice with same second
+ * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
+ * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
+ * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
*
* Note: For timespec(64) based interfaces wall_to_monotonic is what
* we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -88,10 +96,28 @@ struct tk_read_base {
*
* @monotonic_to_boottime is a timespec64 representation of @offs_boot to
* accelerate the VDSO update for CLOCK_BOOTTIME.
+ *
+ * The cacheline ordering of the structure is optimized for in kernel usage of
+ * the ktime_get() and ktime_get_ts64() family of time accessors. Struct
+ * timekeeper is prepended in the core timekeeping code with a sequence count,
+ * which results in the following cacheline layout:
+ *
+ * 0: seqcount, tkr_mono
+ * 1: xtime_sec ... tai_offset
+ * 2: tkr_raw, raw_sec
+ * 3,4: Internal variables
+ *
+ * Cacheline 0,1 contain the data which is used for accessing
+ * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the
+ * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal
+ * variables which are only accessed during timekeeper updates once per
+ * tick.
*/
struct timekeeper {
+ /* Cacheline 0 (together with prepended seqcount of timekeeper core): */
struct tk_read_base tkr_mono;
- struct tk_read_base tkr_raw;
+
+ /* Cacheline 1: */
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
@@ -99,31 +125,29 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
+
+ /* Cacheline 2: */
+ struct tk_read_base tkr_raw;
+ u64 raw_sec;
+
+ /* Cachline 3 and 4 (timekeeping internal variables): */
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
- ktime_t next_leap_ktime;
- u64 raw_sec;
+
struct timespec64 monotonic_to_boot;
- /* The following members are for timekeeping internal use */
u64 cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
u64 raw_interval;
- /* The ntp_tick_length() value currently being used.
- * This cached copy ensures we consistently apply the tick
- * length for an entire tick, as ntp_tick_length may change
- * mid-tick, and we don't want to apply that new value to
- * the tick in progress.
- */
+
+ ktime_t next_leap_ktime;
u64 ntp_tick;
- /* Difference between accumulated time and NTP time in ntp
- * shifted nano seconds. */
s64 ntp_error;
u32 ntp_error_shift;
u32 ntp_err_mult;
- /* Flag used to avoid updating NTP twice with same second */
u32 skip_second_overflow;
+
#ifdef CONFIG_DEBUG_TIMEKEEPING
long last_warning;
/*
The following commit has been merged into the timers/core branch of tip:
Commit-ID: 3b7b1f9c33bc25ace98ee5be766d0c8b23332c14
Gitweb: https://git.kernel.org/tip/3b7b1f9c33bc25ace98ee5be766d0c8b23332c14
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Tue, 15 Oct 2024 12:08:39 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Fri, 25 Oct 2024 16:41:11 +02:00
timekeeping: Reorder struct timekeeper
struct timekeeper is ordered suboptimal vs. cachelines. The layout,
including the preceding seqcount (see struct tk_core in timekeeper.c) is:
cacheline 0: seqcount, tkr_mono
cacheline 1: tkr_raw, xtime_sec
cacheline 2: ktime_sec ... tai_offset, internal variables
cacheline 3: next_leap_ktime, raw_sec, internal variables
cacheline 4: internal variables
So any access to via ktime_get*() except for access to CLOCK_MONOTONIC_RAW
will use either cachelines 0 + 1 or cachelines 0 + 2. Access to
CLOCK_MONOTONIC_RAW uses cachelines 0 + 1 + 3.
Reorder the members so that the result is more efficient:
cacheline 0: seqcount, tkr_mono
cacheline 1: xtime_sec, ktime_sec ... tai_offset
cacheline 2: tkr_raw, raw_sec
cacheline 3: internal variables
cacheline 4: internal variables
That means ktime_get*() will access cacheline 0 + 1 and CLOCK_MONOTONIC_RAW
access will use cachelines 0 + 2.
Update kernel-doc and fix formatting issues while at it. Also fix a typo
in struct tk_read_base kernel-doc.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <jstultz@google.com>
Link: https://lore.kernel.org/all/20241015100839.12702-1-anna-maria@linutronix.de
---
include/linux/timekeeper_internal.h | 106 ++++++++++++++++-----------
1 file changed, 65 insertions(+), 41 deletions(-)
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 902c20e..a3b6380 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -26,7 +26,7 @@
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessors.
+ * for the fast NMI safe accessors.
*
* @base_real is for the fast NMI safe accessor to allow reading clock
* realtime from any context.
@@ -44,33 +44,41 @@ struct tk_read_base {
/**
* struct timekeeper - Structure holding internal timekeeping values.
- * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
- * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
- * @xtime_sec: Current CLOCK_REALTIME time in seconds
- * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
- * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
- * @offs_real: Offset clock monotonic -> clock realtime
- * @offs_boot: Offset clock monotonic -> clock boottime
- * @offs_tai: Offset clock monotonic -> clock tai
- * @tai_offset: The current UTC to TAI offset in seconds
- * @clock_was_set_seq: The sequence number of clock was set events
- * @cs_was_changed_seq: The sequence number of clocksource change events
- * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
- * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
- * @cycle_interval: Number of clock cycles in one NTP interval
- * @xtime_interval: Number of clock shifted nano seconds in one NTP
- * interval.
- * @xtime_remainder: Shifted nano seconds left over when rounding
- * @cycle_interval
- * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
- * @ntp_error: Difference between accumulated time and NTP time in ntp
- * shifted nano seconds.
- * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
- * ntp shifted nano seconds.
- * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
- * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
- * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
+ * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
+ * @xtime_sec: Current CLOCK_REALTIME time in seconds
+ * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
+ * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
+ * @offs_real: Offset clock monotonic -> clock realtime
+ * @offs_boot: Offset clock monotonic -> clock boottime
+ * @offs_tai: Offset clock monotonic -> clock tai
+ * @tai_offset: The current UTC to TAI offset in seconds
+ * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
+ * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
+ * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_was_changed_seq: The sequence number of clocksource change events
+ * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @cycle_interval: Number of clock cycles in one NTP interval
+ * @xtime_interval: Number of clock shifted nano seconds in one NTP
+ * interval.
+ * @xtime_remainder: Shifted nano seconds left over when rounding
+ * @cycle_interval
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
+ * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
+ * @ntp_tick: The ntp_tick_length() value currently being
+ * used. This cached copy ensures we consistently
+ * apply the tick length for an entire tick, as
+ * ntp_tick_length may change mid-tick, and we don't
+ * want to apply that new value to the tick in
+ * progress.
+ * @ntp_error: Difference between accumulated time and NTP time in ntp
+ * shifted nano seconds.
+ * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds.
+ * @ntp_err_mult: Multiplication factor for scaled math conversion
+ * @skip_second_overflow: Flag used to avoid updating NTP twice with same second
+ * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
+ * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
+ * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
*
* Note: For timespec(64) based interfaces wall_to_monotonic is what
* we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -88,10 +96,28 @@ struct tk_read_base {
*
* @monotonic_to_boottime is a timespec64 representation of @offs_boot to
* accelerate the VDSO update for CLOCK_BOOTTIME.
+ *
+ * The cacheline ordering of the structure is optimized for in kernel usage of
+ * the ktime_get() and ktime_get_ts64() family of time accessors. Struct
+ * timekeeper is prepended in the core timekeeping code with a sequence count,
+ * which results in the following cacheline layout:
+ *
+ * 0: seqcount, tkr_mono
+ * 1: xtime_sec ... tai_offset
+ * 2: tkr_raw, raw_sec
+ * 3,4: Internal variables
+ *
+ * Cacheline 0,1 contain the data which is used for accessing
+ * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the
+ * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal
+ * variables which are only accessed during timekeeper updates once per
+ * tick.
*/
struct timekeeper {
+ /* Cacheline 0 (together with prepended seqcount of timekeeper core): */
struct tk_read_base tkr_mono;
- struct tk_read_base tkr_raw;
+
+ /* Cacheline 1: */
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
@@ -99,31 +125,29 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
+
+ /* Cacheline 2: */
+ struct tk_read_base tkr_raw;
+ u64 raw_sec;
+
+ /* Cachline 3 and 4 (timekeeping internal variables): */
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
- ktime_t next_leap_ktime;
- u64 raw_sec;
+
struct timespec64 monotonic_to_boot;
- /* The following members are for timekeeping internal use */
u64 cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
u64 raw_interval;
- /* The ntp_tick_length() value currently being used.
- * This cached copy ensures we consistently apply the tick
- * length for an entire tick, as ntp_tick_length may change
- * mid-tick, and we don't want to apply that new value to
- * the tick in progress.
- */
+
+ ktime_t next_leap_ktime;
u64 ntp_tick;
- /* Difference between accumulated time and NTP time in ntp
- * shifted nano seconds. */
s64 ntp_error;
u32 ntp_error_shift;
u32 ntp_err_mult;
- /* Flag used to avoid updating NTP twice with same second */
u32 skip_second_overflow;
+
#ifdef CONFIG_DEBUG_TIMEKEEPING
long last_warning;
/*
© 2016 - 2024 Red Hat, Inc.