From nobody Sun May 24 23:30:58 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D311F3E2AB0 for ; Wed, 20 May 2026 13:52:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285156; cv=none; b=kYdcHOP7JV4wgn8DUGMuVet7ofmDM3X0GKe9806bGsSzSyIHv3kAWXSo80HZkkdJj+l3eWM/SaoN0KM1IjRb68dpbcw+Djr5BUakSCGaS1hJouYywEoGMA0MsKEcmF2SN/g2HnC5SuqT8CZ7aciJFGftSLAwWEKZJAS/e6N05kM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285156; c=relaxed/simple; bh=5ZhK+vVY2l/9U6FmN/GzCYwtoHlNX8/MXAFrwrPH69g=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=VoY1k6p09/Hq6lGBO08DkuuUjErkSdz8ROfa/xVokW9IoAmf3ZkidsDnzc7mXnUH3ar/hLvuq7YdwSrBgYO3WAfKLUEkBywDJy7lKmF23aCRL31v1VVk9mu4dQ4LfnbnrFM58N9WXdDoMSWeVQVcEukUaHeDThakhmjEEgUzWmw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=desiato.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=o4Cz0pNE; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=desiato.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="o4Cz0pNE" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Sender:Content-Transfer-Encoding: MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From: Reply-To:Content-Type:Content-ID:Content-Description; bh=7Z8W0OMj4/tPFDjolIGcHfJEcbOy/bbIlYsOAn2ejhI=; b=o4Cz0pNEP3feneWQeT96hO1Qnz pjvS/O7XWMdShBZLi1kacjO0K3Aw914A2BmuESg/KXaAa0rBkma5o2kX0P44qmvGwASYJJwqXhq1a h8dkqdVkDwHQQb8xSZktXtqtgmPe7V9rj/2NYjC1TxXzxRbd4vNgqCiLQK2xdfqeOFMeDp4GKcXx4 T2C4jnduSYipgFp6sX8m5sFJK8WXNTYBjZ1bGSHzAL1uXlRDCC5LhVMuR3MXwDFm6acZ2pZaGy/Vz YaR1J1i1QKmnJt0fYRiwERbPgX+SKNJso6oCKezl5tf4RocdKlehv1atN82x2S6pzj936eQ8y8Xrw Wv5uU+9Q==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by desiato.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLF-0000000GzA1-3ZY4; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tH-1gr7; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 01/10] MAINTAINERS: Add Miroslav as timekeeping reviewer Date: Wed, 20 May 2026 14:33:40 +0100 Message-ID: <20260520135207.37826-2-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by desiato.infradead.org. See http://www.infradead.org/rpr.html Content-Type: text/plain; charset="utf-8" From: David Woodhouse If Thomas is going to nudge me on IRC to add Miroslav to Cc on timekeeping patches, then he might as well actually be listed in the MAINTAINERS file. Signed-off-by: David Woodhouse Acked-by: John Stultz --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7b49c5fa7054..fefc2b007b87 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26892,6 +26892,7 @@ TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER M: John Stultz M: Thomas Gleixner R: Stephen Boyd +R: Miroslav Lichvar L: linux-kernel@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/co= re --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC5853E2ABF for ; Wed, 20 May 2026 13:52:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285154; cv=none; b=VLeuqPNY5tNRSez2WenGRgg5tc2vYWXTfKDFs+Vi5BvfnGw3bhy9ihv6u7UjZ1zllXQGF+XmN0Us/vsc3U4BTwOXUhTecN6CbHfM2IF1BrVxolxysXrWyRj/9GBx+f5FG2xLAfrtNlqeMgTaCB01RQgnpJjEbdiBmkKBbUE2PCA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285154; c=relaxed/simple; bh=elZZytGWoxchOMsQRajf9pPz2TD4QUN75F28hVqnwQY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=GlaP4dvIyw1bmz7LOJnq2Df6iNiHoE3VJg41f+1zk+dtLzcz/pY7VBICfGCgGnkg4tXEJDwF5PcQ7gExpw1gATS7DQ2Ph0lgBOdcEGbHpg34Lxv8UWKWYj9zohb4dMnFdy0QfMBYGbTAXGhWJobk/2lhVxObRHynxgm4MXOi++g= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=desiato.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=T0gaY4C6; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=desiato.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="T0gaY4C6" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Sender:Content-Transfer-Encoding: MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From: Reply-To:Content-Type:Content-ID:Content-Description; bh=loyO5FKM9whCWGZYzcNDlJKIKuqvDbYyNAGZZlLdonU=; b=T0gaY4C6kd7gD8QISSPQvnJrDF YMULasnRURHSYKDRtoIrbLw/ZUF6DD7GehWAhuDBOAV0uEk1UGXJ4jbFyggOY0TY2v3oGFlgxDynS glHCK4whKRavjdZg0k0XzNV3uVe6aEyDzJV5/Lu+H80MoBF0Gq+5UP2sSWZ0fH3T7AoS0711dTYJV kx2oh9uv03bh0cj+j6SOk727Zcdogtq3jN/ZfYkv/f693K2g+TFEOP9M1D+HScwekNCmXvqCnre9n bcSj/JXiFWWCWc6YOeQ4c5SSoRs+vyKulc78ILvv6w7JXB7Fmk16UHcme9+jyDwGEDS7N/Tkql6E9 uXDbdY8A==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by desiato.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLF-0000000GzA2-3Zs7; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tK-2243; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 02/10] timekeeping: Remove xtime_remainder from ntp_error accumulation Date: Wed, 20 May 2026 14:33:41 +0100 Message-ID: <20260520135207.37826-3-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by desiato.infradead.org. See http://www.infradead.org/rpr.html Content-Type: text/plain; charset="utf-8" From: David Woodhouse The ntp_error accumulator tracks the difference between the time actually reported to consumers in xtime, and the *intended* time. The former is subject to a sawtooth effect due to the quantisation of 'mult', which means that it actually advances by 'xtime_interval' each tick, while the intended clock advances by 'ntp_tick'. By dithering between adjacent integer values of 'mult' which result in an 'xtime_interval' slightly higher/lower than the intended tick length, the advancement of xtime is kept on average to the intended rate. The accounting should therefore adjust ntp_error by adding ntp_tick and subtracting xtime_interval on each tick. Since commit a386b5af8edd ("time: Compensate for rounding on odd-frequency clocksources") the value subtracted has been (xtime_interval + xtime_remainder), which is wrong. The effect is a systematic drift whose magnitude depends on the value of xtime_remainder and the NTP frequency correction. NTP masks this by continuously adjusting the frequency to compensate, but with a fixed frequency (or an external reference clock like vmclock), the drift is exposed. The value of xtime_remainder actually does represent the difference between the tick period and xtime_interval, so simply adding it instead of (+ tick length - xtime_remainder) might have made sense... except that it's only calculated once at boot time, so it's inaccurate anyway. So just kill it with fire. Also remove it from the mult computation in timekeeping_adjust(), which used it to offset the division for the same (incorrect) reason. Fixes: a386b5af8edd ("time: Compensate for rounding on odd-frequency clocks= ources") Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m --- include/linux/timekeeper_internal.h | 3 --- kernel/time/timekeeping.c | 8 +++----- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper= _internal.h index e36d11e33e0c..da6cf383bedc 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -84,8 +84,6 @@ struct tk_read_base { * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. - * @xtime_remainder: Shifted nano seconds left over when rounding - * @cycle_interval * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @ntp_tick: The ntp_tick_length() value currently being @@ -178,7 +176,6 @@ struct timekeeper { =20 u64 cycle_interval; u64 xtime_interval; - s64 xtime_remainder; u64 raw_interval; =20 ktime_t next_leap_ktime; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c493a4010305..b84b05f9d460 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -360,7 +360,6 @@ static void tk_setup_internals(struct timekeeper *tk, s= truct clocksource *clock) =20 /* Go back from cycles -> shifted ns */ tk->xtime_interval =3D interval * clock->mult; - tk->xtime_remainder =3D ntpinterval - tk->xtime_interval; tk->raw_interval =3D interval * clock->mult; =20 /* if changing clocks, convert xtime_nsec shift units */ @@ -2337,8 +2336,8 @@ static void timekeeping_adjust(struct timekeeper *tk,= s64 offset) mult =3D tk->tkr_mono.mult - tk->ntp_err_mult; } else { tk->ntp_tick =3D ntp_tl; - mult =3D div64_u64((tk->ntp_tick >> tk->ntp_error_shift) - - tk->xtime_remainder, tk->cycle_interval); + mult =3D div64_u64(tk->ntp_tick >> tk->ntp_error_shift, + tk->cycle_interval); } =20 /* @@ -2463,8 +2462,7 @@ static u64 logarithmic_accumulation(struct timekeeper= *tk, u64 offset, =20 /* Accumulate error between NTP and clock interval */ tk->ntp_error +=3D tk->ntp_tick << shift; - tk->ntp_error -=3D (tk->xtime_interval + tk->xtime_remainder) << - (tk->ntp_error_shift + shift); + tk->ntp_error -=3D tk->xtime_interval << (tk->ntp_error_shift + shift); =20 return offset; } --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D310439BFE6 for ; Wed, 20 May 2026 13:52:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285148; cv=none; b=e6dM13NQP82rpip8SFt4Zpo7YsU/2J9XbUifd13gmEhy6HEC4iQQabathtdM6lX6rPPTGOlATzfkhrXMNyTUA5J8TOohuoWEn8m4VgzAtBcwwA6OcyYfnCUQo9NRk/+b4QgHXZI9A7o6QgIIZz9+RGT39QDikB1/7lpd2fuAEpY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285148; c=relaxed/simple; bh=x2EDSdakPJhC6So/XLDvqzuzR3btxiKD/OiQ/5RL+Fo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=MDYOWsEo/TAZBxZzWxSnoLvr/CUkCdvqxKeX3F+u4IZCy/FG3FB/n9M7/jNQyt7DZ5UvfhBxdFY8o12l9Mt7VPE1A/wgn7E2xyItTzGpJQO1tB1Ve+fpaYcUVc3uBidt+8PK0mRcDegoPXEMBpZIiyZT1QoSnfo2qpuI/E86OY4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=casper.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=k1vmDGCn; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=casper.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="k1vmDGCn" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Sender:Content-Transfer-Encoding: Content-Type:MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc: To:From:Reply-To:Content-ID:Content-Description; bh=Oa47mKK9GRk5UqeFd7JbZkXnPSyG8Wc01sGke8jD9tU=; b=k1vmDGCn/QLQFBwJy7kcF67Gq8 DlyNBkvmZBOIRC3t4A7M/nnurKydThvKzFsXTv0ctYU9uWBKui377G7qemCzN9zpr03ior8xt8mJ8 +ChwImng3VT8/4VLjhVbHksDNtJ9kELTYPfsgNAWU1GQBd2pEP8V696Ih6Fkb1IOd2mPT/aopy+9e v72dN/oEnZcEKUmNCu1SV/hFJSzQ8G2OfNaudMD25u+nLTlcG197bNj3gx4BwQhM440hRRdKVqvuu lqrnaYpzUS8a0go0CBMeaZUKC5LpCJECfaqrTeQjSVEMMSA7vYzAqsbzAOig/ows8VMjlGBjwMZPO ZFi9nkWA==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by casper.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLG-00000007DRp-0VqL; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tN-2Hbd; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 03/10] timekeeping: Account for monotonicity adjustment in ntp_error Date: Wed, 20 May 2026 14:33:42 +0100 Message-ID: <20260520135207.37826-4-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by casper.infradead.org. See http://www.infradead.org/rpr.html From: David Woodhouse timekeeping_apply_adjustment() modifies xtime_nsec to maintain vDSO monotonicity when mult changes: xtime_nsec -=3D offset This ensures that the time reported to userspace does not jump when the multiplier is adjusted from one tick to the next. However, the ntp_error accumulator which tracks the difference between intended and actual clock position was not being updated updated to reflect this additional discrepancy. An earlier attempt at this compensation existed as: ntp_error -=3D (interval - offset) << ntp_error_shift but was removed in commit c2cda2a5bda9 ("timekeeping/ntp: Don't align NTP frequency adjustments to ticks") because it was a major source of NTP error. That's because (interval - offset) was wrong: the subtraction of "interval" prematurely accounted for the changed xtime_interval of the next tick, which would be correctly accounted in the next accumulation anyway =E2=80=94 a double subtraction. What is actually needed is just the "offset" part: ntp_error must be told that xtime_nsec moved by "offset" without a corresponding change in the intended position. For the normal =C2=B11 mult dithering this is negligible (the adjustments cancel over time), but for larger mult changes =E2=80=94 such as when an external reference clock sets a new frequency =E2=80=94 the one-time uncompensated offset is significant. Fix by adjusting ntp_error by the correct amount: ntp_error +=3D offset << ntp_error_shift This keeps ntp_error consistent with the actual xtime_nsec position after the adjustment. Fixes: c2cda2a5bda9 ("timekeeping/ntp: Don't align NTP frequency adjustment= s to ticks") Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m Acked-by: John Stultz --- kernel/time/timekeeping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b84b05f9d460..95973e45d456 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2317,6 +2317,7 @@ static __always_inline void timekeeping_apply_adjustm= ent(struct timekeeper *tk, tk->tkr_mono.mult +=3D mult_adj; tk->xtime_interval +=3D interval; tk->tkr_mono.xtime_nsec -=3D offset; + tk->ntp_error +=3D offset << tk->ntp_error_shift; } =20 /* --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC8293E3140 for ; Wed, 20 May 2026 13:52:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; cv=none; b=YOkWspffI5IxBeaGnxYzuE4Hca4HNjyvy4EUIfrLGbIjzoU2YmY5iLkieoLkDeVT4VDI0CcNss7JhP/ZnEUo/LiTPZLKw7HynBy/yHEIl421wu+d11DrEYJTjHadTdgDkPpjz/WwnRx73dfbJ8h3JKzv0bOUYSaT7BWC8xtXTbw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; c=relaxed/simple; bh=8P3QXodJnUM+lW8e+syZNYOapKB9rtZDgllWY+gd5zU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=PhgRllUxix8iFbhPbJ4PAfeH1Zq3B5OCVBeO3nvdwms+At7AGrRWfDZLjmE5ClehzR53QDsuv+AA2BL9Ur0HddQY0OV0w7PJz4HpLRjD27uqsMk2Tedg+eQNb4OwN7jjo8u7jegWUrAB9xjrAJDs0lkZFhd18hTPVTymseD4AJc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=desiato.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=iFvZG3Oe; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=desiato.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="iFvZG3Oe" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Sender:Content-Transfer-Encoding: MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From: Reply-To:Content-Type:Content-ID:Content-Description; bh=sZtfa5e11apFou1TJeCmQDi4iVZgL0Y74MyLZe6af2g=; b=iFvZG3OeTWvx2S0w+JLfeQ3tpQ kf8VwrvxgFd80foRDD1SvQ+aVypOoMdFI4tLpFwuKTsCMqJ3kEKdv8zPh9C+H/qWTc6LeJxOCsjCA ubTDnh8jjqqZHFFKuXMrvEA1llwO4MXrfI8PbgS27TMhl3XHflmWdtl+eFuqcQ1LlWfcVL0TEVptB Hu8K0M66k2I9AxrTj0dCSw5DjLdRDpgnbTAHCFXAowtYedsGwe9DBRHahzG/NjZPCaqkxZz5uXu7b qw3doRt08vGN8F0k0FmKakAz4UuzshIaNTJ5KGeeIKf1lDWgHByh5L6hbTWI3K6Z/EclH1CV1ZWfL BE/Q8ZfQ==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by desiato.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLF-0000000Gz9x-3Xqv; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tQ-2c2z; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 04/10] timekeeping: Guard against divide-by-zero in timekeeping_adjust Date: Wed, 20 May 2026 14:33:43 +0100 Message-ID: <20260520135207.37826-5-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by desiato.infradead.org. See http://www.infradead.org/rpr.html Content-Type: text/plain; charset="utf-8" From: David Woodhouse When the TSC clocksource is recalibrated (e.g. on KVM guests with clocksource=3Dtsc), cycle_interval can momentarily be zero during the transition. Guard the div64_u64 in timekeeping_adjust() to prevent a divide-by-zero oops. This can be triggered on KVM guests that force clocksource=3Dtsc when the guest's measured TSC frequency doesn't match what KVM reported, causing a recalibration during boot. Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m Acked-by: John Stultz --- kernel/time/timekeeping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 95973e45d456..0d5faa5d13d7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2336,6 +2336,8 @@ static void timekeeping_adjust(struct timekeeper *tk,= s64 offset) if (likely(tk->ntp_tick =3D=3D ntp_tl)) { mult =3D tk->tkr_mono.mult - tk->ntp_err_mult; } else { + if (unlikely(!tk->cycle_interval)) + return; tk->ntp_tick =3D ntp_tl; mult =3D div64_u64(tk->ntp_tick >> tk->ntp_error_shift, tk->cycle_interval); --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC7913E2ADD for ; Wed, 20 May 2026 13:52:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; cv=none; b=iEIplMzU4tNVuyR2DEuBw7L/CdQS/5fZGc6i30ISZef87rxeDJikILzJ23etJcnggIYggwA2yrkNvg6y4JlsAmUzF3wUJ2SDxspmiS3t11nai9k+fpkl+0BiXhU5nvL9VB+CyBZ5PtAOckESoyqFKf8K3om0+rBTHoG/g8PHEm0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; c=relaxed/simple; bh=VPmfuImtbJBZlEQ0jY3Q/6KifrEXvNezS2EF8x9uq4Y=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=BBVO+nM0JaqKS30Bgu7OrDnlF5LciGaxAETEUHQQTrYgbbKaUYp+acp9J/Nd1XY6VqtDMMKPfQQgUNOvHKOUrAuwE52Ft/WsG8Qnl0dmQuyL+hDgKgVR2QbNrbQI+3cUZ/1DzWM67UK9Z20jKGadh/IXcYuPfPOBpjIlQdiQKLg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=desiato.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=mTnp+Nps; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=desiato.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="mTnp+Nps" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Sender:Content-Transfer-Encoding: Content-Type:MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc: To:From:Reply-To:Content-ID:Content-Description; bh=otb6VbKJV4fUNrCvU5G6qkdh4myIziSqkwmi0Pt0gX8=; b=mTnp+NpsnmnZu7D2rJ5z+AIzps h0rLbQLN50W3UT86iCW6FqQiXUGLlTPB9zvPw0k2xP9JSpmc7D2Lw39ia9WbyFR6TnmESNsgdtqIN 6p1P+uV/Kyw/0O9EssTZcwc1H3y4i7dbITK+wrnmviebGC5JEhu3ApHMFh24+IpCcTYk+hxIt3dBE Yfbv33Nm37PKQ6bX0ToLQH5qfvwahlFlNtPE4Yi5slZ+hfzlVZU3TqW2XlEVpMiQqmXkB2hOR5Yh6 oDAZTEkIhTY0IAqqj++oU+obJQ7i0/f2hnJnlZ0FxNd0+orSP9882ICwbfBR7bJphk8hhZX4zfjZS h10+OvuQ==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by desiato.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLF-0000000Gz9y-3Y2R; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tT-2wpp; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 05/10] timekeeping: Drive time_offset skew via per-tick ntp_error transfer Date: Wed, 20 May 2026 14:33:44 +0100 Message-ID: <20260520135207.37826-6-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by desiato.infradead.org. See http://www.infradead.org/rpr.html From: David Woodhouse Instead of inflating tick_length to effect the time_offset slew, transfer the skew to ntp_error per-tick and drain time_offset at the equivalent per-tick rate: - ntp_error +=3D skew_delta << shift (biases dithering to deliver skew) - time_offset -=3D skew_delta / NTP_INTERVAL_FREQ (per-tick drain) Compute mult from (ntp_tick + skew_delta) so the dithering has enough bandwidth to deliver the skew rate by selecting between mult and mult+1. This is equivalent to the old tick_length +=3D delta approach but without modifying tick_length, and with exact per-tick accounting of the time_offset drain. To eliminate remainder error in the per-tick division, skew_delta is rounded to a multiple of NTP_INTERVAL_FREQ in second_overflow(). second_overflow() computes skew_delta (the exponential decay rate) but no longer drains time_offset or inflates tick_length directly. Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m --- include/linux/timekeeper_internal.h | 1 + kernel/time/ntp.c | 35 +++++++++++++++++++++++++++-- kernel/time/ntp_internal.h | 2 ++ kernel/time/timekeeping.c | 29 +++++++++++++++++++----- 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper= _internal.h index da6cf383bedc..9de6b5b94dc0 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -184,6 +184,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s64 skew_delta; s32 tai_offset; }; =20 diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 97fa99b96dd0..87f3f5d0d13d 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -63,6 +63,7 @@ struct ntp_data { int time_state; int time_status; s64 time_offset; + s64 skew_delta; long time_constant; long time_maxerror; long time_esterror; @@ -364,6 +365,31 @@ u64 ntp_tick_length(unsigned int tkid) return tk_ntp_data[tkid].tick_length; } =20 +s64 ntp_get_skew_delta(unsigned int tkid) +{ + return tk_ntp_data[tkid].skew_delta; +} + +s64 ntp_drain_time_offset(unsigned int tkid, s64 amount) +{ + struct ntp_data *ntpdata =3D &tk_ntp_data[tkid]; + + /* Only drain if amount and time_offset have the same sign */ + if (!amount || (amount > 0) !=3D (ntpdata->time_offset > 0)) + return amount; + + /* Clamp: don't overshoot zero */ + if (abs(amount) > abs(ntpdata->time_offset)) { + s64 undrained =3D amount - ntpdata->time_offset; + + ntpdata->time_offset =3D 0; + return undrained; + } + + ntpdata->time_offset -=3D amount; + return 0; +} + /** * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime= _t * @tkid: Timekeeper ID @@ -460,9 +486,14 @@ int second_overflow(unsigned int tkid, time64_t secs) /* Compute the phase adjustment for the next second */ ntpdata->tick_length =3D ntpdata->tick_length_base; =20 + /* + * Set the per-tick skew rate for the tick code. This is in the + * same units as tick_length (ns << NTP_SCALE_SHIFT), and is + * rounded to a multiple of NTP_INTERVAL_FREQ so that the per-tick + * division in the tick code is exact. + */ delta =3D ntp_offset_chunk(ntpdata, ntpdata->time_offset); - ntpdata->time_offset -=3D delta; - ntpdata->tick_length +=3D delta; + ntpdata->skew_delta =3D delta - delta % NTP_INTERVAL_FREQ; =20 /* Check PPS signal */ pps_dec_valid(ntpdata); diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 7084d839c207..05e5dd5e1b70 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -6,6 +6,8 @@ extern void ntp_init(void); extern void ntp_clear(unsigned int tkid); /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ extern u64 ntp_tick_length(unsigned int tkid); +extern s64 ntp_get_skew_delta(unsigned int tkid); +extern s64 ntp_drain_time_offset(unsigned int tkid, s64 amount); extern ktime_t ntp_get_next_leap(unsigned int tkid); extern int second_overflow(unsigned int tkid, time64_t secs); extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, con= st struct timespec64 *ts, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 0d5faa5d13d7..27b2a093b138 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2327,20 +2327,23 @@ static __always_inline void timekeeping_apply_adjus= tment(struct timekeeper *tk, static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { u64 ntp_tl =3D ntp_tick_length(tk->id); + s64 skew =3D ntp_get_skew_delta(tk->id); u32 mult; =20 /* - * Determine the multiplier from the current NTP tick length. - * Avoid expensive division when the tick length doesn't change. + * Determine the multiplier from the current NTP tick length plus + * skew_delta. The skew biases mult so that =C2=B11 dithering can deliver + * the time_offset slew rate. Recompute when either changes. */ - if (likely(tk->ntp_tick =3D=3D ntp_tl)) { + if (likely(tk->ntp_tick =3D=3D ntp_tl && tk->skew_delta =3D=3D skew)) { mult =3D tk->tkr_mono.mult - tk->ntp_err_mult; } else { if (unlikely(!tk->cycle_interval)) return; tk->ntp_tick =3D ntp_tl; - mult =3D div64_u64(tk->ntp_tick >> tk->ntp_error_shift, - tk->cycle_interval); + tk->skew_delta =3D skew; + mult =3D div64_u64((tk->ntp_tick + skew) >> tk->ntp_error_shift, + tk->cycle_interval); } =20 /* @@ -2467,6 +2470,22 @@ static u64 logarithmic_accumulation(struct timekeepe= r *tk, u64 offset, tk->ntp_error +=3D tk->ntp_tick << shift; tk->ntp_error -=3D tk->xtime_interval << (tk->ntp_error_shift + shift); =20 + /* + * During clock skew driven by ntpdata->time_offset, transfer a + * *portion* of the requested total delta into ntp_error from + * time_offset each tick. The second_overflow() function sets + * the rate of skew, and the value of 'mult' has been selected + * in order to allow the dithering to keep ntp_error around zero + * even while this adjustment is being applied. + */ + if (tk->skew_delta) { + s64 drain =3D div_s64(tk->skew_delta << shift, + NTP_INTERVAL_FREQ); + + tk->ntp_error +=3D (tk->skew_delta << shift) - + ntp_drain_time_offset(tk->id, drain); + } + return offset; } =20 --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC6E23E2AD4 for ; Wed, 20 May 2026 13:52:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285156; cv=none; b=fDT2+nNrfxIrUDPkNhQ5btaUjQOjVqegkbInX89cZulNIRIIp2tNadj+FuWagzodX9bruUX/EinUkOGDBf/n91PCK7fE8GPjEmUlnq8LU6D6okHfB8G06xdhAwvpeJz7xC4xiLJ0+n/mQNkil7pWxKJeojQ8dgLUk1L+UccdN6g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285156; c=relaxed/simple; bh=rJ7atr485VQoVPSyI3mvyEaIiBNtnrM2JzKw7P0XWnI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=gyxkDYmCgDWWHJAYb9VorANZ7nTqiyPOmSo2vJvmVfkjeQrsdEGUGdG4F+Jd56sjAW+J+wLpBLY9soFOnrllZLNjYN4WI4uHypIE9cyX5l5Ght5/gU+mzoP1uPIRYF/1pOgi+BhsCRIdO7R0B5cd46sItmauYz4MxCYuEthetxE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=desiato.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=qXtyiKpL; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=desiato.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="qXtyiKpL" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Sender:Content-Transfer-Encoding: Content-Type:MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc: To:From:Reply-To:Content-ID:Content-Description; bh=xZJUJ4kXkweYJWr6wFjdyibEBFaktS6q92k69LmkIq4=; b=qXtyiKpLuUOFniOCzzmE18xFCS ghQyw1H931MbyWVIEJLMsNO/eBYAhagttSkXUXCHn1QuxQrF009022hvd619TIwTPM1AJ3GpFhNK5 37ed3rTJEQUVjyzDECA5pYkxN0kIkzEC9QgP7d2Eq60HtxXSyX4KK65QCmkeuaJ66TWcCEbmS5Ef2 5UG8yg5T8pWCkHfuP3MVWwt+DFsrpwylpxmhJnuJaZTeXmFe2ZcTIRfT17pmeJ++fe3LCI2BzP+9i TKMe2sijzBTL69FFSVVD3HRLd+eVKAJgynmXpQVP7KUhiTI7UWARM7eHPFafM4zWowX7P/6k0H57w QoFL63uA==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by desiato.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLF-0000000GzA0-3Xqu; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tW-36qZ; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 06/10] ntp: Convert adjtime() to use time_offset instead of tick_length inflation Date: Wed, 20 May 2026 14:33:45 +0100 Message-ID: <20260520135207.37826-7-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by desiato.infradead.org. See http://www.infradead.org/rpr.html From: David Woodhouse The legacy adjtime() syscall (ADJ_ADJTIME/ADJ_OFFSET_SINGLESHOT) used to slew the clock by inflating tick_length directly via time_adjust. This was the last remaining user of tick_length !=3D tick_length_base. Convert it to fold time_adjust into time_offset each second (up to MAX_TICKADJ per second, same rate limit as before). The existing time_offset skew mechanism then delivers it via the per-tick ntp_error transfer and mult adjustment. Introduce ntp_set_time_offset() helper for setting time_offset from a nanosecond value, and refactor ntp_update_offset() to use it. This helper will also be used by the feed-forward reference clock API in a subsequent commit. This eliminates the last source of tick_length inflation, making tick_length always equal tick_length_base. Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m --- kernel/time/ntp.c | 52 ++++++++++++++++++++++++-------------- kernel/time/ntp_internal.h | 1 + 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 87f3f5d0d13d..2b75653b456c 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -329,7 +329,7 @@ static void ntp_update_offset(struct ntp_data *ntpdata,= long offset) =20 ntpdata->time_freq =3D max(freq_adj, -MAXFREQ_SCALED); =20 - ntpdata->time_offset =3D div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVA= L_FREQ); + ntp_set_time_offset(ntpdata - tk_ntp_data, offset64); } =20 static void __ntp_clear(struct ntp_data *ntpdata) @@ -390,6 +390,24 @@ s64 ntp_drain_time_offset(unsigned int tkid, s64 amoun= t) return 0; } =20 +/** + * ntp_set_time_offset - Set the NTP time offset (phase correction) + * @tkid: Timekeeper ID + * @offset_ns: Desired offset in nanoseconds + * + * Converts nanoseconds to internal time_offset units and stores it. + * Also clears time_adjust since a new offset supersedes any pending + * adjtime() slew. + */ +void ntp_set_time_offset(unsigned int tkid, s64 offset_ns) +{ + struct ntp_data *ntpdata =3D &tk_ntp_data[tkid]; + + ntpdata->time_offset =3D div_s64((s64)offset_ns << NTP_SCALE_SHIFT, + NTP_INTERVAL_FREQ); + ntpdata->time_adjust =3D 0; +} + /** * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime= _t * @tkid: Timekeeper ID @@ -498,26 +516,22 @@ int second_overflow(unsigned int tkid, time64_t secs) /* Check PPS signal */ pps_dec_valid(ntpdata); =20 - if (!ntpdata->time_adjust) - goto out; - - if (ntpdata->time_adjust > MAX_TICKADJ) { - ntpdata->time_adjust -=3D MAX_TICKADJ; - ntpdata->tick_length +=3D MAX_TICKADJ_SCALED; - goto out; - } - - if (ntpdata->time_adjust < -MAX_TICKADJ) { - ntpdata->time_adjust +=3D MAX_TICKADJ; - ntpdata->tick_length -=3D MAX_TICKADJ_SCALED; - goto out; + /* + * Fold any pending time_adjust (from adjtime()) into time_offset. + * This used to inflate tick_length directly; now it uses the same + * per-tick skew mechanism as NTP's time_offset. Rate-limited to + * MAX_TICKADJ (500=C2=B5s) per second. + */ + if (ntpdata->time_adjust) { + long adj =3D clamp(ntpdata->time_adjust, + (long)-MAX_TICKADJ, (long)MAX_TICKADJ); + + ntpdata->time_adjust -=3D adj; + ntpdata->time_offset +=3D div_s64( + (s64)adj * NSEC_PER_USEC << NTP_SCALE_SHIFT, + NTP_INTERVAL_FREQ); } =20 - ntpdata->tick_length +=3D (s64)(ntpdata->time_adjust * NSEC_PER_USEC / NT= P_INTERVAL_FREQ) - << NTP_SCALE_SHIFT; - ntpdata->time_adjust =3D 0; - -out: return leap; } =20 diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 05e5dd5e1b70..639860ff2baf 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -8,6 +8,7 @@ extern void ntp_clear(unsigned int tkid); extern u64 ntp_tick_length(unsigned int tkid); extern s64 ntp_get_skew_delta(unsigned int tkid); extern s64 ntp_drain_time_offset(unsigned int tkid, s64 amount); +extern void ntp_set_time_offset(unsigned int tkid, s64 offset_ns); extern ktime_t ntp_get_next_leap(unsigned int tkid); extern int second_overflow(unsigned int tkid, time64_t secs); extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, con= st struct timespec64 *ts, --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2FFE63DD52B for ; Wed, 20 May 2026 13:52:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; cv=none; b=IoJ9aXVyd/MdV4JB27Pis5SVw3605RqmpHOI1aCjUO1FVctcJTQk92q32C13gZZCTnFYdNiZoEWDcmgggzSdU+zz7y/MLOBYKcqSTugzSbWD+fn4BuNYhbUQ83g7MN1pwB6sIEaJCe+tmpg3RUkcVr/pZuesah/mOFpWP69iScw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; c=relaxed/simple; bh=4fX4GeabdU62Fkra4um+WSg6BDuz0vt5ajDXY9JKOw8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ESm3Kc6GTJFf7o46mTPGAOaKwYCgRZcOi3xGil9idAm7RV+x1d5fC42vTz/sKdNhrEY+3ZnPwpdlFa7w/0T8gz2fqzH/DEEcY8iGtbbz3/VD6aH4nfqfbiijEJpmuP4PY/+MnzeQosCNtPwER5RdlZhRDnQtscHT2hjNTEb2ItY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=desiato.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=SsPAwpoR; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=desiato.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="SsPAwpoR" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Sender:Content-Transfer-Encoding: MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From: Reply-To:Content-Type:Content-ID:Content-Description; bh=6NngXNdLEkN3FyxYgvraZUzq6HeZpiWE7IIpASU6XAI=; b=SsPAwpoRCeun+AedSxKWTcdQt3 zPfRRIFgoI1OOIqmRd+kC1AHxhGf92RRs6iAwqqgUaOgBwb9HrMxY2EoVEfk+toqihmWcQSOasobx pxdrQAbAVn0l0sbPL41oQs4wwvwTkzrCb9cxwkJJbbKHuhNJeqNlQYQyJPjJe+wOn4zOSC+s6tE3o Alm7DHX2w93XUo89A1YHvdyx8fEo6Auc0UpYSUZCMnpZhDMtn+vD1HbXl3fj9Qx3c/Fl3/5ecXls/ g5A4Lt/PMshWQe0ykH6ObwauWsxibIloohKqxxL433covTRT8fX9Gp3F+iRDfMuxWbSjBh9cj7X2a 3OMQfvaQ==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by desiato.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLF-0000000Gz9z-3XYc; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tZ-3RMM; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 07/10] ntp: Remove tick_length_base, use tick_length directly Date: Wed, 20 May 2026 14:33:46 +0100 Message-ID: <20260520135207.37826-8-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by desiato.infradead.org. See http://www.infradead.org/rpr.html Content-Type: text/plain; charset="utf-8" From: David Woodhouse Now that nothing inflates tick_length beyond tick_length_base (the adjtime path was converted to use time_offset in the previous commit), the two fields are always equal. Remove tick_length_base and keep tick_length as the single field. Remove the per-second reset and the delta update in ntp_update_frequency() since there is no separate base to track. No functional change intended. Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m --- kernel/time/ntp.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 2b75653b456c..4494f258dd86 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -26,8 +26,7 @@ /** * struct ntp_data - Structure holding all NTP related state * @tick_usec: USER_HZ period in microseconds - * @tick_length: Adjusted tick length - * @tick_length_base: Base value for @tick_length + * @tick_length: Tick length in ns << NTP_SCALE_SHIFT * @time_state: State of the clock synchronization * @time_status: Clock status bits * @time_offset: Time adjustment in nanoseconds @@ -59,7 +58,6 @@ struct ntp_data { unsigned long tick_usec; u64 tick_length; - u64 tick_length_base; int time_state; int time_status; s64 time_offset; @@ -246,8 +244,7 @@ static inline void pps_fill_timex(struct ntp_data *ntpd= ata, struct __kernel_time #endif /* CONFIG_NTP_PPS */ =20 /* - * Update tick_length and tick_length_base, based on tick_usec, ntp_tick_a= dj and - * time_freq: + * Update tick_length based on tick_usec, ntp_tick_adj and time_freq: */ static void ntp_update_frequency(struct ntp_data *ntpdata) { @@ -264,8 +261,7 @@ static void ntp_update_frequency(struct ntp_data *ntpda= ta) * Don't wait for the next second_overflow, apply the change to the * tick length immediately: */ - ntpdata->tick_length +=3D new_base - ntpdata->tick_length_base; - ntpdata->tick_length_base =3D new_base; + ntpdata->tick_length =3D new_base; } =20 static inline s64 ntp_update_offset_fll(struct ntp_data *ntpdata, s64 offs= et64, long secs) @@ -342,7 +338,6 @@ static void __ntp_clear(struct ntp_data *ntpdata) =20 ntp_update_frequency(ntpdata); =20 - ntpdata->tick_length =3D ntpdata->tick_length_base; ntpdata->time_offset =3D 0; =20 ntpdata->ntp_next_leap_sec =3D TIME64_MAX; @@ -502,7 +497,6 @@ int second_overflow(unsigned int tkid, time64_t secs) } =20 /* Compute the phase adjustment for the next second */ - ntpdata->tick_length =3D ntpdata->tick_length_base; =20 /* * Set the per-tick skew rate for the tick code. This is in the --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 881C83DE439 for ; Wed, 20 May 2026 13:52:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285149; cv=none; b=lvdAO1cjmvBw+T954Sb8s3xWuUEffNkdTfXsKYGTgbwic0z3q+e11XtjGSx3MZyqlkYhOP7KgJXr4lQxsdmp3BCDRLcmrpKh49n4tLWajdF0WERWRkMRsjjSMlfb8jqhe57BCauSC6YmSVqR+6Nx0YllovKwNKoanmM20cr42VI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285149; c=relaxed/simple; bh=01M4v7SHQfxs4U8XWCvW8MbkR1ugtScnAXcwU+JW0Ys=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=bmV6GFAllPjXRrdocTgt3HQoKGfcIzTKdFQlpgbt6lA05gPV6c+1JFh874BHjKw8xXwnNtIVx5cofDbWH4Gmiw82lGLt7O8w2xB5/dNl+4bvob3G4LPDS2NyEl736pV4MfmiLSLQezktc4Biah9xI6YuGukw5JmTAQxw929PGFA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=casper.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=ljgClzro; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=casper.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="ljgClzro" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Sender:Content-Transfer-Encoding: MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From: Reply-To:Content-Type:Content-ID:Content-Description; bh=38iGrtb0lKcMY13D9sVP6SlOc0cqHJoLi18Pl08yD6M=; b=ljgClzro762kisie4L2//sgajX cM3wTtV0ebTfEiwk6Tv128Hz7jMlMk5tlJbP02YP7AgOcGYjAgcodElrJR8GCll37ze1EvN7b7E5s E3meAAfYn89nrzI68uBg1yjLcbN242dPjposzbZPLaVLxP0hCnlC8sygOiF6/QgY6JLpWXsR6d8Cb 51hak9NBotxlICHkA5CTaiJkidyrwGinExGtlYw26dWO8z+4MUV4wmhubCmYCrSkl8nbFHypFuthc 0yWJQtF+3fFlsA6rabhIEYsaDhDzrJZsRdR+EqQyl69LIYsZ6AsvyEiFGQhOAMtQ4eLU8Mkair+XT 8ncpAL3A==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by casper.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLG-00000007DRq-0esX; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tc-3lko; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 08/10] timekeeping: Add absolute reference for feed-forward clock discipline Date: Wed, 20 May 2026 14:33:47 +0100 Message-ID: <20260520135207.37826-9-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by casper.infradead.org. See http://www.infradead.org/rpr.html Content-Type: text/plain; charset="utf-8" From: David Woodhouse Add timekeeping_set_reference() which allows an external clock source (such as a hypervisor vmclock) to provide an absolute time reference. The reference defines a linear counter-to-time mapping that the kernel uses to set both the frequency and phase of the system clock. When timekeeping_set_reference() is called: - tick_length is computed from the reference period and set via ntp_set_tick_length() - the phase delta is set via ntp_set_time_offset() So the NTP state is entirely consistent, and the existing time_offset skew mechanism then converges the clock to the reference, with the ntp_error and time_offset accumulators staying accurate throughout. Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m --- include/linux/timekeeping_reference.h | 19 ++++++++++++++ kernel/time/ntp.c | 14 ++++++++++ kernel/time/ntp_internal.h | 1 + kernel/time/timekeeping.c | 38 +++++++++++++++++++++++++++ 4 files changed, 72 insertions(+) create mode 100644 include/linux/timekeeping_reference.h diff --git a/include/linux/timekeeping_reference.h b/include/linux/timekeep= ing_reference.h new file mode 100644 index 000000000000..4c1d8a6c02f1 --- /dev/null +++ b/include/linux/timekeeping_reference.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TIMEKEEPING_REFERENCE_H +#define _LINUX_TIMEKEEPING_REFERENCE_H + +#include +#include + +struct tk_reference { + enum clocksource_ids cs_id; + u64 counter_value; + u64 time_sec; + u64 time_frac_sec; + u64 period_frac_sec; + u8 period_shift; +}; + +int timekeeping_set_reference(const struct tk_reference *ref); + +#endif diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 4494f258dd86..3dc098695665 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -403,6 +403,20 @@ void ntp_set_time_offset(unsigned int tkid, s64 offset= _ns) ntpdata->time_adjust =3D 0; } =20 +void ntp_set_tick_length(unsigned int tkid, u64 tick_length) +{ + struct ntp_data *ntpdata =3D &tk_ntp_data[tkid]; + u64 base; + + /* Compute the nominal second length (without frequency adjustment) */ + base =3D (u64)(ntpdata->tick_usec * NSEC_PER_USEC * USER_HZ) + << NTP_SCALE_SHIFT; + base +=3D ntpdata->ntp_tick_adj; + + ntpdata->time_freq =3D (s64)(tick_length * NTP_INTERVAL_FREQ - base); + ntp_update_frequency(ntpdata); +} + /** * ntp_get_next_leap - Returns the next leapsecond in CLOCK_REALTIME ktime= _t * @tkid: Timekeeper ID diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 639860ff2baf..14ca8bc08120 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -9,6 +9,7 @@ extern u64 ntp_tick_length(unsigned int tkid); extern s64 ntp_get_skew_delta(unsigned int tkid); extern s64 ntp_drain_time_offset(unsigned int tkid, s64 amount); extern void ntp_set_time_offset(unsigned int tkid, s64 offset_ns); +extern void ntp_set_tick_length(unsigned int tkid, u64 tick_length); extern ktime_t ntp_get_next_leap(unsigned int tkid); extern int second_overflow(unsigned int tkid, time64_t secs); extern int ntp_adjtimex(unsigned int tkid, struct __kernel_timex *txc, con= st struct timespec64 *ts, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 27b2a093b138..5c4b377505bc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2324,6 +2324,44 @@ static __always_inline void timekeeping_apply_adjust= ment(struct timekeeper *tk, * Adjust the timekeeper's multiplier to the correct frequency * and also to reduce the accumulated error value. */ + +#include + + +int timekeeping_set_reference(const struct tk_reference *ref) +{ + struct timekeeper *tk =3D &tk_core.timekeeper; + u64 new_tl, delta, ref_frac; + s64 ref_err; + unsigned long flags; + + raw_spin_lock_irqsave(&tk_core.lock, flags); + + if (tk->cs_id !=3D ref->cs_id) { + raw_spin_unlock_irqrestore(&tk_core.lock, flags); + return -ENODEV; + } + + new_tl =3D mul_u64_u64_shr(ref->period_frac_sec, + (u64)tk->cycle_interval * NSEC_PER_SEC, + 32 + ref->period_shift); + ntp_set_tick_length(tk->id, new_tl); + + /* Compute phase offset at cycle_last and set time_offset to slew */ + delta =3D tk->tkr_mono.cycle_last - ref->counter_value; + ref_frac =3D mul_u64_u64_shr(delta, ref->period_frac_sec, + ref->period_shift) + ref->time_frac_sec; + ref_err =3D (s64)mul_u64_u64_shr(ref_frac, + (u64)NSEC_PER_SEC << tk->tkr_mono.shift, 64) - + (s64)tk->tkr_mono.xtime_nsec; + ntp_set_time_offset(tk->id, ref_err >> tk->tkr_mono.shift); + tk->ntp_error =3D 0; + + raw_spin_unlock_irqrestore(&tk_core.lock, flags); + return 0; +} +EXPORT_SYMBOL_GPL(timekeeping_set_reference); + static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { u64 ntp_tl =3D ntp_tick_length(tk->id); --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DF004344DA4 for ; Wed, 20 May 2026 13:52:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285149; cv=none; b=WNLeND2JrzmoHdRfY8vvDzVikoW2gtJBkVaxIEEjTZC/ILuDFJ00tyTlFM5c2NQXH4uSiFTn4oSJTPXiAKh9NMDnt4wEkEC3jIkgkLZuvVC043A08+cNDdipL6+O6cp/vXdBz7Ae7JOdQu3y5dnLCQLYaT6jCBr0KGBHge9XejA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285149; c=relaxed/simple; bh=046NnFbkyTEvULHJ97BkOmyChiLKzCSTrwA7zwV9qkM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=hGDLFACUPh1KnP5eKxQcxMq4mQqEjPw908y+0vneEEKdIFb1iR8aLUShfasIRBWzU/HIcgA50McSDQgblG17DEtoxzeFv+wHVYYduZh5H6Ws9VzsFPGo6gtvQ2fsFw47c9oydQR4XaLeaVHgMvbDHVwM5Q1oj80jnOxzcXZTzao= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=casper.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=PVzuAzIM; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=casper.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="PVzuAzIM" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Sender:Content-Transfer-Encoding: MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc:To:From: Reply-To:Content-Type:Content-ID:Content-Description; bh=VekwbJCxSRNw7utblzdeWiHzUNmgZKpPMgMUJvGikDE=; b=PVzuAzIMD5IlS6EQMYr4+58K5p U0fnrLcfYXB6jXH/UCaXsedT0Pkty8xbH0dZE1zpZDSwjl4a29TaPpKMUbuzZM7GOxSh1ypRA3QfG 0AjMpPixq4mtoEDmNZGw7tqA3k/uRVDtXrImUpK4YmAfd/raxtAoc/hVZGf38AGp+++QgqrsdSlpk DUwOSoj8pgefiAl5xYYDY14vHDzrltdomSpUi1u3hbZts2SwQBa78ukhAtqu4S1CdI3beSV7s9/DS T53/hwgznkYoMvPjly60fhP1mmXIpKc6hf4WhnrE7rJ06DriUmwFKYYiuVqR1mTOMcMHfMJ9Cxr6V 2rA2dIfw==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by casper.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLG-00000007DRr-0o34; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLF-000000009tn-45xO; Wed, 20 May 2026 14:52:09 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 09/10] ptp_vmclock: Feed reference to timekeeping for feed-forward discipline Date: Wed, 20 May 2026 14:33:48 +0100 Message-ID: <20260520135207.37826-10-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by casper.infradead.org. See http://www.infradead.org/rpr.html Content-Type: text/plain; charset="utf-8" From: David Woodhouse When a vmclock device provides valid time, call timekeeping_set_reference() to enable feed-forward clock discipline. This eliminates drift between the system clock and the vmclock reference. The reference is set at probe time (after PTP registration) and updated on each notification from the hypervisor (ACPI or DT interrupt). If cycle_interval is not provided (set to 0), timekeeping_set_reference() fills it from the current timekeeper. Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m --- drivers/ptp/ptp_vmclock.c | 95 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 8b630eb916b5..abb8f821bcaa 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -27,6 +27,7 @@ #include =20 #include +#include =20 #ifdef CONFIG_X86 #include @@ -48,6 +49,7 @@ struct vmclock_state { wait_queue_head_t disrupt_wait; struct ptp_clock_info ptp_clock_info; struct ptp_clock *ptp_clock; + struct timer_list cmp_timer; enum clocksource_ids cs_id, sys_cs_id; int index; char *name; @@ -334,6 +336,92 @@ static const struct ptp_clock_info ptp_vmclock_info = =3D { .getcrosststamp =3D ptp_vmclock_getcrosststamp, }; =20 +static void vmclock_cmp_timer_fn(struct timer_list *t) +{ + struct vmclock_state *st =3D container_of(t, struct vmclock_state, cmp_ti= mer); + struct vmclock_abi *clk =3D st->clk; + struct system_time_snapshot snap; + unsigned __int128 product; + u64 delta, ref_frac, ref_ns, sys_ns; + s64 diff; + u32 seq; + + do { + seq =3D le32_to_cpu(READ_ONCE(clk->seq_count)); + if (seq & 1) + goto rearm; + /* Pairs with the smp_wmb() in the vmclock page writer */ + smp_rmb(); + + ktime_get_snapshot(&snap); + if (snap.cs_id !=3D st->cs_id) + goto rearm; + + delta =3D snap.cycles - le64_to_cpu(clk->counter_value); + product =3D (unsigned __int128)delta * + le64_to_cpu(clk->counter_period_frac_sec); + product >>=3D clk->counter_period_shift; + product +=3D le64_to_cpu(clk->time_frac_sec); + ref_frac =3D (u64)product; + ref_ns =3D mul_u64_u64_shr(ref_frac, NSEC_PER_SEC, 64); + ref_ns +=3D (le64_to_cpu(clk->time_sec) + + (u64)(product >> 64)) * NSEC_PER_SEC; + /* Pairs with the smp_wmb() in the vmclock page writer */ + smp_rmb(); + if (seq !=3D le32_to_cpu(READ_ONCE(clk->seq_count))) + goto rearm; + } while (0); + + sys_ns =3D ktime_to_ns(snap.real) - + (s64)(int16_t)le16_to_cpu(clk->tai_offset_sec) * NSEC_PER_SEC; + diff =3D (s64)(ref_ns - sys_ns); + pr_info("vmclock_cmp: diff=3D%lldns tsc=3D%llx\n", diff, snap.cycles); + +rearm: + mod_timer(&st->cmp_timer, jiffies + msecs_to_jiffies(500)); +} + +static void vmclock_set_tk_reference(struct vmclock_state *st) +{ + struct vmclock_abi *clk =3D st->clk; + struct tk_reference ref =3D { + .cs_id =3D st->cs_id, + .counter_value =3D le64_to_cpu(clk->counter_value), + .time_sec =3D le64_to_cpu(clk->time_sec), + .time_frac_sec =3D le64_to_cpu(clk->time_frac_sec), + .period_frac_sec =3D le64_to_cpu(clk->counter_period_frac_sec), + .period_shift =3D clk->counter_period_shift, + }; + + /* Convert TAI to UTC for comparison with xtime_sec */ + if (clk->time_type =3D=3D VMCLOCK_TIME_TAI && + (le64_to_cpu(clk->flags) & VMCLOCK_FLAG_TAI_OFFSET_VALID)) + ref.time_sec +=3D (int16_t)le16_to_cpu(clk->tai_offset_sec); + + if (clk->clock_status !=3D VMCLOCK_STATUS_UNRELIABLE) { + /* Step clock if far from reference */ + struct timespec64 now, vmtime; + unsigned __int128 product; + u64 cycles =3D get_cycles(); + u64 delta_cycles =3D cycles - ref.counter_value; + s64 delta_ns; + + product =3D (unsigned __int128)delta_cycles * ref.period_frac_sec; + product >>=3D ref.period_shift; + product +=3D ref.time_frac_sec; + vmtime.tv_sec =3D ref.time_sec + (u64)(product >> 64); + vmtime.tv_nsec =3D mul_u64_u64_shr((u64)product, + NSEC_PER_SEC, 64); + + ktime_get_real_ts64(&now); + delta_ns =3D timespec64_to_ns(&vmtime) - timespec64_to_ns(&now); + if (delta_ns > 100000000 || delta_ns < -100000000) + do_settimeofday64(&vmtime); + + timekeeping_set_reference(&ref); + } +} + static struct ptp_clock *vmclock_ptp_register(struct device *dev, struct vmclock_state *st) { @@ -525,6 +613,7 @@ vmclock_acpi_notification_handler(acpi_handle __always_= unused handle, struct device *device =3D dev; struct vmclock_state *st =3D device->driver_data; =20 + vmclock_set_tk_reference(st); wake_up_interruptible(&st->disrupt_wait); } =20 @@ -580,6 +669,7 @@ static irqreturn_t vmclock_of_irq_handler(int __always_= unused irq, void *_st) { struct vmclock_state *st =3D _st; =20 + vmclock_set_tk_reference(st); wake_up_interruptible(&st->disrupt_wait); return IRQ_HANDLED; } @@ -751,8 +841,13 @@ static int vmclock_probe(struct platform_device *pdev) st->ptp_clock =3D NULL; return ret; } + if (st->ptp_clock) + vmclock_set_tk_reference(st); } =20 + timer_setup(&st->cmp_timer, vmclock_cmp_timer_fn, 0); + mod_timer(&st->cmp_timer, jiffies + msecs_to_jiffies(500)); + if (!st->miscdev.minor && !st->ptp_clock) { /* Neither miscdev nor PTP registered */ dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registeri= ng\n"); --=20 2.54.0 From nobody Sun May 24 23:30:58 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC64D3E2AC8 for ; Wed, 20 May 2026 13:52:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; cv=none; b=NEjFeYYupSF3UlRn9vJmMBzrfOJ8hB6X+TYcLz2u5hhKPi/ovbgoa5F08EdsFxNLIQbFbgAj1DJs14I4LC3oeSG3uDzakZR4Y/vmK932qPfT0mewLSgNzTPz6cD2YNCBfQbWrbH1SFFRhJa0RyhmjArpypim1iCG+ZZoxBUnU0M= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779285155; c=relaxed/simple; bh=umKqYiXbE2g25S2v0xnywt7nxNBqOEe5J76CcB1xsUc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=G3eoOASOFpBfMX/IPKTmzKkBCnY9zboXtVbKk/h2DCDj7bY4Gn73EQSwvwc7a+UqnycE+xXOLDVymvOf4fPN0lDKo8ExTk2n/quNUVv1MtXD9VZJXSurBlhZcNkQDTQRWdt+2im0ojnjrpBODyMnYqgoCipu+goxMceU9dO5GpI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=desiato.srs.infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=VppAigPT; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=desiato.srs.infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="VppAigPT" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Sender:Content-Transfer-Encoding: Content-Type:MIME-Version:References:In-Reply-To:Message-ID:Date:Subject:Cc: To:From:Reply-To:Content-ID:Content-Description; bh=i/oW+npvkBwgf3degeq3unSjsisk8w31FduzydsBNOE=; b=VppAigPT1lkUTRAmhfGhlBoqWD Te5dG6E/Id2hqjVFQjlgKNvfXNhQadkUqcFIzmyPlpfIUT2V8W601le/MwoLfJKRDdGhmOMMe/lzQ 5mpJ1ytsgl3HdsI/HHiQfb1InfP4woXDnxBDxkTT+fZvVErHY//Zi76ucRiT/RiwSo1HanfNHzcZk K0g4DP1RUwtToS8me8WEqrhYNKCXyIOa3uxDV1EW1XxVC8f8W1le+KpuKkQeS/6/M2d+s+aadperq Ig8hkemJzGiynMhvMij/HUEDjn/ODupNVfrzKoRuB2+u0I59VRiPEI1hBAIGmetLOPBNQ+P66Tdj2 bpvcV6Ug==; Received: from i7.infradead.org ([2001:8b0:10b:1:21e:67ff:fecb:7a92]) by desiato.infradead.org with esmtpsa (Exim 4.99.1 #2 (Red Hat Linux)) id 1wPhLF-0000000GzA3-3bto; Wed, 20 May 2026 13:52:10 +0000 Received: from dwoodhou by i7.infradead.org with local (Exim 4.99.2 #2 (Red Hat Linux)) id 1wPhLG-000000009tr-04TJ; Wed, 20 May 2026 14:52:10 +0100 From: David Woodhouse To: Richard Cochran , Wen Gu , David Woodhouse , Andrew Lunn , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , John Stultz , Thomas Gleixner , Stephen Boyd , Anna-Maria Behnsen , Frederic Weisbecker , Shuah Khan , Peter Zijlstra , =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= , Arnd Bergmann , Miroslav Lichvar , Julien Ridoux , Ryan Luu , linux-kernel@vger.kernel.org Cc: David Woodhouse Subject: [RFC PATCH v3 10/10] kernel/time: Add /dev/vmclock_host miscdev Date: Wed, 20 May 2026 14:33:49 +0100 Message-ID: <20260520135207.37826-11-dwmw2@infradead.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260520135207.37826-1-dwmw2@infradead.org> References: <20260520135207.37826-1-dwmw2@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Sender: David Woodhouse X-SRS-Rewrite: SMTP reverse-path rewritten from by desiato.infradead.org. See http://www.infradead.org/rpr.html From: David Woodhouse Expose the host's NTP-disciplined clock as a vmclock_abi page via /dev/vmclock_host. A VMM can mmap or poll() this device to obtain precision time parameters for relaying to guests. The page is updated via the pvclock_gtod notifier chain when the NTP frequency or skew rate changes. The period computation is redone on frequency changes; time tuple updates are cheap. The phase offset (time_offset) is computed under tk_core.lock in timekeeping_set_reference(), matching the locking used by do_adjtimex/hardpps. Fields populated: - counter_id: X86_TSC (or ARM_VCNT) - time_type: TAI (if tai_offset known) or UTC - counter_value: TSC at reference point - time_sec/time_frac_sec: time at reference point - counter_period_frac_sec: NTP-disciplined TSC period - tai_offset_sec: current UTC-TAI offset (if known) - clock_status: SYNCHRONIZED / FREERUNNING / UNKNOWN - leap_indicator: from NTP time_state Signed-off-by: David Woodhouse Assisted-by: Kiro:claude-opus-4.6-1m --- include/linux/timekeeper_internal.h | 2 + kernel/time/Kconfig | 7 + kernel/time/Makefile | 1 + kernel/time/ntp.c | 19 + kernel/time/ntp_internal.h | 5 + kernel/time/timekeeping.c | 2 + kernel/time/vmclock_host.c | 391 ++++++++++++++++++ .../selftests/timers/vmclock_host_test.c | 171 ++++++++ 8 files changed, 598 insertions(+) create mode 100644 kernel/time/vmclock_host.c create mode 100644 tools/testing/selftests/timers/vmclock_host_test.c diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper= _internal.h index 9de6b5b94dc0..c3d6f17e0623 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -185,6 +185,8 @@ struct timekeeper { u32 ntp_err_mult; u32 skip_second_overflow; s64 skew_delta; + int ntp_status; + int ntp_time_state; s32 tai_offset; }; =20 diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 02aac7c5aa76..f0cddfec5751 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -221,4 +221,11 @@ config POSIX_AUX_CLOCKS and other clock domains, which are not correlated to the TAI/NTP notion of time. =20 +config VMCLOCK_HOST + tristate "VMClock host time provider (/dev/vmclock_host)" + depends on X86_TSC || ARM64 + help + Expose the host NTP-disciplined clock as a vmclock page via + /dev/vmclock_host for VMMs to relay precision time to guests. + endmenu diff --git a/kernel/time/Makefile b/kernel/time/Makefile index eaf290c972f9..549070254e3a 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -33,3 +33,4 @@ obj-$(CONFIG_TIME_NS) +=3D namespace.o obj-$(CONFIG_TIME_NS_VDSO) +=3D namespace_vdso.o obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) +=3D clocksource-wdtest.o obj-$(CONFIG_TIME_KUNIT_TEST) +=3D time_test.o +obj-$(CONFIG_VMCLOCK_HOST) +=3D vmclock_host.o diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3dc098695665..2866d4208117 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -365,6 +365,13 @@ s64 ntp_get_skew_delta(unsigned int tkid) return tk_ntp_data[tkid].skew_delta; } =20 +s64 ntp_get_time_offset_ns(unsigned int tkid) +{ + return shift_right(tk_ntp_data[tkid].time_offset * NTP_INTERVAL_FREQ, + NTP_SCALE_SHIFT); +} +EXPORT_SYMBOL_GPL(ntp_get_time_offset_ns); + s64 ntp_drain_time_offset(unsigned int tkid, s64 amount) { struct ntp_data *ntpdata =3D &tk_ntp_data[tkid]; @@ -669,6 +676,18 @@ static inline bool ntp_synced(void) return !(tk_ntp_data[TIMEKEEPER_CORE].time_status & STA_UNSYNC); } =20 +int ntp_get_status(void) +{ + return tk_ntp_data[TIMEKEEPER_CORE].time_status; +} +EXPORT_SYMBOL_GPL(ntp_get_status); + +int ntp_get_time_state(void) +{ + return tk_ntp_data[TIMEKEEPER_CORE].time_state; +} +EXPORT_SYMBOL_GPL(ntp_get_time_state); + /* * If we have an externally synchronized Linux clock, then update RTC clock * accordingly every ~11 minutes. Generally RTCs can only store second diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 14ca8bc08120..ba1d14bbcf0e 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -2,13 +2,18 @@ #ifndef _LINUX_NTP_INTERNAL_H #define _LINUX_NTP_INTERNAL_H =20 +struct audit_ntp_data; + extern void ntp_init(void); +extern int ntp_get_status(void); +extern int ntp_get_time_state(void); extern void ntp_clear(unsigned int tkid); /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ extern u64 ntp_tick_length(unsigned int tkid); extern s64 ntp_get_skew_delta(unsigned int tkid); extern s64 ntp_drain_time_offset(unsigned int tkid, s64 amount); extern void ntp_set_time_offset(unsigned int tkid, s64 offset_ns); +extern s64 ntp_get_time_offset_ns(unsigned int tkid); extern void ntp_set_tick_length(unsigned int tkid, u64 tick_length); extern ktime_t ntp_get_next_leap(unsigned int tkid); extern int second_overflow(unsigned int tkid, time64_t secs); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5c4b377505bc..b93fab0890df 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2380,6 +2380,8 @@ static void timekeeping_adjust(struct timekeeper *tk,= s64 offset) return; tk->ntp_tick =3D ntp_tl; tk->skew_delta =3D skew; + tk->ntp_status =3D ntp_get_status(); + tk->ntp_time_state =3D ntp_get_time_state(); mult =3D div64_u64((tk->ntp_tick + skew) >> tk->ntp_error_shift, tk->cycle_interval); } diff --git a/kernel/time/vmclock_host.c b/kernel/time/vmclock_host.c new file mode 100644 index 000000000000..d43f2b043fb9 --- /dev/null +++ b/kernel/time/vmclock_host.c @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * /dev/vmclock_host - Expose host NTP-disciplined time as a vmclock page. + * + * This provides a vmclock_abi structure populated from the host's + * CLOCK_REALTIME (TAI), allowing a VMM to efficiently relay precision + * time to guests without per-tick overhead. + * + * The page is updated only when the NTP frequency (ntp_tick) changes + * or the clocksource changes =E2=80=94 not on every timekeeping tick. + * Userspace can poll() for changes. + * + * Copyright =C2=A9 2026 Amazon.com, Inc. or its affiliates. + */ + +#include +#include +#include +#include "ntp_internal.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +static struct vmclock_abi *vmclock_page; +static DECLARE_WAIT_QUEUE_HEAD(vmclock_wait); +static u64 cached_ntp_tick; +static s64 cached_skew_delta; +static u64 cached_period_frac; +static u8 cached_period_shift; + +/* + * Compute counter_period_frac_sec from ntp_tick and cycle_interval. + * + * ntp_tick is ns_per_tick << 32. + * cycle_interval is counter cycles per tick. + * + * vmclock wants: period =3D frac_sec / 2^(64 + shift) in seconds. + * + * ns_per_cycle =3D ntp_tick / cycle_interval (in <<32 fixed point) + * + * period =3D ntp_tick / (cycle_interval * 10^9 * 2^32) seconds/cycle + * frac_sec =3D ntp_tick * 2^(32+shift) / (cycle_interval * 10^9) + * + * Use div64_u64 with maximum pre-shift for precision. + * The key: do TWO divisions to get 64 bits of quotient. + */ +static void vmclock_compute_period(struct timekeeper *tk, + u64 *period_frac, u8 *period_shift) +{ + u64 ntp_tick =3D tk->ntp_tick; + u64 cycle_interval =3D tk->cycle_interval; + u64 divisor =3D cycle_interval * 1000000000ULL; + int headroom =3D __builtin_clzll(ntp_tick); + u64 rem, result; + int bits_so_far; + + /* + * Compute ntp_tick * 2^(headroom + N) / divisor with 64 bits + * of precision, using iterative 32-bit chunk divisions. + * + * First division: ntp_tick << headroom / divisor + */ + result =3D div64_u64_rem(ntp_tick << headroom, divisor, &rem); + bits_so_far =3D 64 - __builtin_clzll(result ?: 1); + + /* Fill remaining bits 32 at a time from the remainder */ + while (bits_so_far < 64 && rem) { + int chunk =3D min(32, 64 - bits_so_far); + int rem_headroom =3D __builtin_clzll(rem); + u64 extra; + + if (rem_headroom < chunk) + chunk =3D rem_headroom; + + extra =3D div64_u64_rem(rem << chunk, divisor, &rem); + result =3D (result << chunk) | extra; + bits_so_far +=3D chunk; + headroom +=3D chunk; + } + + /* Pad with zeros if we ran out of remainder */ + if (bits_so_far < 64) { + result <<=3D (64 - bits_so_far); + headroom +=3D (64 - bits_so_far); + } + + /* + * result =3D ntp_tick * 2^headroom / divisor + * =3D (ntp_tick / (cycle_interval * 10^9)) * 2^headroom + * =3D period_seconds * 2^32 * 2^headroom + * =3D period_seconds * 2^(32 + headroom) + * + * vmclock: frac_sec / 2^(64 + shift) =3D period_seconds + * So: shift =3D 32 + headroom - 64 =3D headroom - 32 + */ + *period_frac =3D result; + *period_shift =3D (u8)(headroom - 32); +} + + +static u8 vmclock_counter_id(struct timekeeper *tk) +{ + enum clocksource_ids id =3D tk->cs_id; + + if (IS_ENABLED(CONFIG_X86) && id =3D=3D CSID_X86_TSC) + return VMCLOCK_COUNTER_X86_TSC; + if (IS_ENABLED(CONFIG_ARM64) && id =3D=3D CSID_ARM_ARCH_COUNTER) + return VMCLOCK_COUNTER_ARM_VCNT; + return VMCLOCK_COUNTER_INVALID; +} + +/* + * Called from pvclock_gtod_notify on every timekeeping update. + * Only does real work when ntp_tick or skew_delta changes. + */ +static int vmclock_host_notify(struct notifier_block *nb, + unsigned long was_set, void *data) +{ + struct timekeeper *tk =3D data; + struct vmclock_abi *clk =3D vmclock_page; + bool period_changed =3D false; + u8 counter_id; + s64 ns, sec; + u64 hi, rem, counter_value, time_frac; + __le64 le_time_sec, le_time_frac, le_counter_value; + __le64 le_period_frac; + u8 period_shift, clock_status; + + if (!clk) + return NOTIFY_DONE; + + /* Early exit if nothing relevant changed */ + if (clk->clock_status !=3D VMCLOCK_STATUS_UNKNOWN && + tk->ntp_tick =3D=3D cached_ntp_tick && + tk->skew_delta =3D=3D cached_skew_delta && !was_set) + return NOTIFY_DONE; + + counter_id =3D vmclock_counter_id(tk); + if (counter_id =3D=3D VMCLOCK_COUNTER_INVALID) { + /* Invalidate the page if clocksource isn't usable */ + WRITE_ONCE(clk->seq_count, cpu_to_le32( + le32_to_cpu(READ_ONCE(clk->seq_count)) + 1)); + smp_wmb(); + clk->counter_id =3D VMCLOCK_COUNTER_INVALID; + clk->clock_status =3D VMCLOCK_STATUS_UNKNOWN; + smp_wmb(); + WRITE_ONCE(clk->seq_count, cpu_to_le32( + le32_to_cpu(READ_ONCE(clk->seq_count)) + 1)); + return NOTIFY_DONE; + } + + /* Recompute period only when frequency changes */ + if (tk->ntp_tick !=3D cached_ntp_tick) { + vmclock_compute_period(tk, &cached_period_frac, + &cached_period_shift); + cached_ntp_tick =3D tk->ntp_tick; + period_changed =3D true; + } + cached_skew_delta =3D tk->skew_delta; + + /* Compute time tuple: C =3D A + ntp_error + time_offset */ + ns =3D tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + sec =3D tk->xtime_sec + tk->tai_offset; + ns +=3D tk->ntp_error >> (tk->tkr_mono.shift + tk->ntp_error_shift); + ns +=3D ntp_get_time_offset_ns(tk->id); + + while (ns < 0) { + ns +=3D NSEC_PER_SEC; + sec--; + } + while (ns >=3D NSEC_PER_SEC) { + ns -=3D NSEC_PER_SEC; + sec++; + } + + counter_value =3D tk->tkr_mono.cycle_last; + hi =3D div64_u64_rem((u64)ns << 32, 1000000000ULL, &rem); + time_frac =3D (hi << 32) | div64_u64(rem << 32, 1000000000ULL); + + clock_status =3D !(ntp_get_status() & STA_UNSYNC) ? + VMCLOCK_STATUS_SYNCHRONIZED : VMCLOCK_STATUS_FREERUNNING; + + /* Prepare le values */ + le_counter_value =3D cpu_to_le64(counter_value); + le_time_sec =3D cpu_to_le64(sec); + le_time_frac =3D cpu_to_le64(time_frac); + le_period_frac =3D cpu_to_le64(cached_period_frac); + period_shift =3D cached_period_shift; + + /* Update page under seqcount */ + WRITE_ONCE(clk->seq_count, cpu_to_le32( + le32_to_cpu(READ_ONCE(clk->seq_count)) + 1)); + smp_wmb(); + + clk->counter_id =3D counter_id; + clk->counter_value =3D le_counter_value; + clk->time_sec =3D le_time_sec; + clk->time_frac_sec =3D le_time_frac; + if (period_changed) { + clk->counter_period_frac_sec =3D le_period_frac; + clk->counter_period_shift =3D period_shift; + } + clk->clock_status =3D clock_status; + + /* Set leap second indicator from NTP time_state */ + switch (ntp_get_time_state()) { + case TIME_INS: + clk->leap_indicator =3D VMCLOCK_LEAP_PRE_POS; + break; + case TIME_DEL: + clk->leap_indicator =3D VMCLOCK_LEAP_PRE_NEG; + break; + case TIME_OOP: + clk->leap_indicator =3D VMCLOCK_LEAP_POS; + break; + case TIME_WAIT: + clk->leap_indicator =3D (ntp_get_status() & STA_DEL) ? + VMCLOCK_LEAP_POST_NEG : VMCLOCK_LEAP_POST_POS; + break; + default: + clk->leap_indicator =3D VMCLOCK_LEAP_NONE; + break; + } + + /* Export as TAI if tai_offset is known, otherwise UTC */ + if (tk->tai_offset) { + clk->time_type =3D VMCLOCK_TIME_TAI; + clk->tai_offset_sec =3D cpu_to_le16((s16)tk->tai_offset); + clk->flags =3D cpu_to_le64(VMCLOCK_FLAG_TAI_OFFSET_VALID | + VMCLOCK_FLAG_TIME_MONOTONIC | + VMCLOCK_FLAG_NOTIFICATION_PRESENT); + } else { + clk->time_type =3D VMCLOCK_TIME_UTC; + clk->tai_offset_sec =3D 0; + clk->flags =3D cpu_to_le64(VMCLOCK_FLAG_TIME_MONOTONIC | + VMCLOCK_FLAG_NOTIFICATION_PRESENT); + } + + smp_wmb(); + WRITE_ONCE(clk->seq_count, cpu_to_le32( + le32_to_cpu(READ_ONCE(clk->seq_count)) + 1)); + + wake_up_interruptible(&vmclock_wait); + return NOTIFY_DONE; +} + +/* File operations */ + +struct vmclock_host_file { + u32 last_seq; +}; + +static int vmclock_host_open(struct inode *inode, struct file *fp) +{ + struct vmclock_host_file *fst; + + fst =3D kzalloc(sizeof(*fst), GFP_KERNEL); + if (!fst) + return -ENOMEM; + + fp->private_data =3D fst; + return 0; +} + +static int vmclock_host_release(struct inode *inode, struct file *fp) +{ + kfree(fp->private_data); + return 0; +} + +static int vmclock_host_mmap(struct file *fp, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & (VM_READ | VM_WRITE)) !=3D VM_READ) + return -EROFS; + + if (vma->vm_end - vma->vm_start !=3D PAGE_SIZE || vma->vm_pgoff) + return -EINVAL; + + return remap_pfn_range(vma, vma->vm_start, + virt_to_phys(vmclock_page) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); +} + +static ssize_t vmclock_host_read(struct file *fp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vmclock_host_file *fst =3D fp->private_data; + u32 seq; + + if (*ppos >=3D PAGE_SIZE) + return 0; + if (count > PAGE_SIZE - *ppos) + count =3D PAGE_SIZE - *ppos; + + do { + seq =3D le32_to_cpu(READ_ONCE(vmclock_page->seq_count)); + if (seq & 1) { + cpu_relax(); + continue; + } + smp_rmb(); + if (copy_to_user(buf, (char *)vmclock_page + *ppos, count)) + return -EFAULT; + smp_rmb(); + } while (le32_to_cpu(READ_ONCE(vmclock_page->seq_count)) !=3D seq); + + fst->last_seq =3D seq; + *ppos +=3D count; + return count; +} + +static __poll_t vmclock_host_poll(struct file *fp, poll_table *wait) +{ + struct vmclock_host_file *fst =3D fp->private_data; + u32 seq; + + poll_wait(fp, &vmclock_wait, wait); + + seq =3D le32_to_cpu(READ_ONCE(vmclock_page->seq_count)); + if (fst->last_seq !=3D seq) + return EPOLLIN | EPOLLRDNORM; + + return 0; +} + +static const struct file_operations vmclock_host_fops =3D { + .owner =3D THIS_MODULE, + .open =3D vmclock_host_open, + .release =3D vmclock_host_release, + .mmap =3D vmclock_host_mmap, + .read =3D vmclock_host_read, + .poll =3D vmclock_host_poll, +}; + +static struct miscdevice vmclock_host_miscdev =3D { + .minor =3D MISC_DYNAMIC_MINOR, + .name =3D "vmclock_host", + .fops =3D &vmclock_host_fops, +}; + +static struct notifier_block vmclock_host_nb =3D { + .notifier_call =3D vmclock_host_notify, +}; + +static int __init vmclock_host_init(void) +{ + int ret; + + vmclock_page =3D (struct vmclock_abi *)get_zeroed_page(GFP_KERNEL); + if (!vmclock_page) + return -ENOMEM; + + /* Set constant fields */ + vmclock_page->magic =3D cpu_to_le32(VMCLOCK_MAGIC); + vmclock_page->size =3D cpu_to_le32(PAGE_SIZE); + vmclock_page->version =3D cpu_to_le16(1); + + ret =3D misc_register(&vmclock_host_miscdev); + if (ret) { + free_page((unsigned long)vmclock_page); + vmclock_page =3D NULL; + return ret; + } + + pvclock_gtod_register_notifier(&vmclock_host_nb); + pr_info("vmclock_host: registered /dev/vmclock_host\n"); + return 0; +} + +static void __exit vmclock_host_exit(void) +{ + pvclock_gtod_unregister_notifier(&vmclock_host_nb); + misc_deregister(&vmclock_host_miscdev); + free_page((unsigned long)vmclock_page); + vmclock_page =3D NULL; +} + +module_init(vmclock_host_init); +module_exit(vmclock_host_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Woodhouse "); +MODULE_DESCRIPTION("VMClock host time provider"); diff --git a/tools/testing/selftests/timers/vmclock_host_test.c b/tools/tes= ting/selftests/timers/vmclock_host_test.c new file mode 100644 index 000000000000..c83cc7e6d404 --- /dev/null +++ b/tools/testing/selftests/timers/vmclock_host_test.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test /dev/vmclock_host by comparing its time against CLOCK_TAI. + * + * Maps the vmclock page, reads time from it using the ABI formula, + * and compares with clock_gettime(CLOCK_TAI) using ABA timestamps + * to bound the uncertainty. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef __x86_64__ +static inline uint64_t read_counter(void) +{ + unsigned int lo, hi; + asm volatile("rdtsc" : "=3Da"(lo), "=3Dd"(hi)); + return ((uint64_t)hi << 32) | lo; +} +#elif defined(__aarch64__) +static inline uint64_t read_counter(void) +{ + uint64_t val; + asm volatile("mrs %0, cntvct_el0" : "=3Dr"(val)); + return val; +} +#else +#error "Unsupported architecture" +#endif + +/* + * Compute time from vmclock: T =3D time_sec + time_frac_sec/2^64 + + * (counter_now - counter_value) * counter_period_frac_sec >> (64 + shif= t) + * + * Returns nanoseconds since epoch. + */ +static int64_t vmclock_read_ns(const volatile struct vmclock_abi *clk, + uint64_t counter_now) +{ + uint64_t delta =3D counter_now - clk->counter_value; + uint64_t period =3D clk->counter_period_frac_sec; + uint8_t shift =3D clk->counter_period_shift; + __uint128_t ns128; + + /* delta * period gives seconds in 0.(64+shift) fixed point */ + ns128 =3D (__uint128_t)delta * period; + ns128 >>=3D shift; + /* Now ns128 is seconds in 0.64 fixed point. Add time_frac_sec */ + ns128 +=3D clk->time_frac_sec; + /* Top 64 bits are whole seconds of fractional part =E2=80=94 but we + * need to add time_sec for the full result */ + uint64_t frac_sec =3D (uint64_t)(ns128 >> 64); + uint64_t sub_sec_ns =3D (uint64_t)(((ns128 & 0xFFFFFFFFFFFFFFFFULL) * + 1000000000ULL) >> 64); + + return (int64_t)(clk->time_sec + frac_sec) * 1000000000LL + sub_sec_ns; +} + +static int64_t clock_tai_ns(void) +{ + struct timespec ts; + clock_gettime(CLOCK_TAI, &ts); + return (int64_t)ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +int main(void) +{ + int fd, ret =3D 0; + volatile struct vmclock_abi *clk; + int i, failures =3D 0; + + fd =3D open("/dev/vmclock_host", O_RDONLY); + if (fd < 0) { + if (errno =3D=3D ENOENT) { + printf("SKIP: /dev/vmclock_host not available\n"); + return 4; + } + perror("open /dev/vmclock_host"); + return 1; + } + + clk =3D mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd, 0); + if (clk =3D=3D MAP_FAILED) { + perror("mmap"); + close(fd); + return 1; + } + + if (clk->magic !=3D VMCLOCK_MAGIC) { + fprintf(stderr, "Bad magic: 0x%x\n", clk->magic); + ret =3D 1; + goto out; + } + + if (clk->counter_id =3D=3D VMCLOCK_COUNTER_INVALID) { + printf("SKIP: counter_id is INVALID (clocksource not TSC?)\n"); + ret =3D 4; + goto out; + } + + printf("vmclock_host: version=3D%u counter_id=3D%u time_type=3D%u status= =3D%u\n", + clk->version, clk->counter_id, clk->time_type, clk->clock_status); + printf(" tai_offset=3D%d\n", (int16_t)clk->tai_offset_sec); + printf(" counter_period_frac_sec=3D0x%" PRIx64 " shift=3D%u\n", + (uint64_t)clk->counter_period_frac_sec, clk->counter_period_shift); + + /* ABA comparison: read CLOCK_TAI, vmclock, CLOCK_TAI */ + printf("\nABA comparison (vmclock vs CLOCK_TAI):\n"); + for (i =3D 0; i < 10; i++) { + uint32_t seq; + int64_t tai_before, tai_after, vmclock_ns; + int64_t delta, window; + + /* Read with seqcount retry */ + do { + seq =3D clk->seq_count; + if (seq & 1) { + __asm__ volatile("pause" ::: "memory"); + continue; + } + __asm__ volatile("" ::: "memory"); + + tai_before =3D clock_tai_ns(); + uint64_t ctr =3D read_counter(); + tai_after =3D clock_tai_ns(); + + __asm__ volatile("" ::: "memory"); + if (clk->seq_count !=3D seq) + continue; + + vmclock_ns =3D vmclock_read_ns(clk, ctr); + break; + } while (1); + + window =3D tai_after - tai_before; + /* vmclock should be between tai_before and tai_after */ + delta =3D vmclock_ns - tai_before; + + printf(" [%d] vmclock-tai_before=3D%+" PRId64 "ns window=3D%" + PRId64 "ns", i, delta, window); + + if (delta < -2000 || delta > window + 2000) { + printf(" FAIL (out of range)\n"); + failures++; + } else { + printf(" OK\n"); + } + + usleep(100000); /* 100ms between samples */ + } + + if (failures) { + printf("\nFAIL: %d/%d samples out of range\n", failures, 10); + ret =3D 1; + } else { + printf("\nPASS: all samples within ABA window\n"); + } + +out: + munmap((void *)clk, 4096); + close(fd); + return ret; +} --=20 2.54.0