fix CPU stuck due to the taprio hrtimer

Yun Lu posted 1 patch 1 year, 7 months ago
net/sched/sch_taprio.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
fix CPU stuck due to the taprio hrtimer
Posted by Yun Lu 1 year, 7 months ago
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git  master

Signed-off-by: Yun Lu <luyun@kylinos.cn>
---
 net/sched/sch_taprio.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index a0d54b422186..2ff8d34bdbac 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -104,6 +104,7 @@ struct taprio_sched {
 	u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
 	u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
 	u32 txtime_delay;
+	ktime_t offset;
 };
 
 struct __tc_taprio_qopt_offload {
@@ -170,6 +171,19 @@ static ktime_t sched_base_time(const struct sched_gate_list *sched)
 	return ns_to_ktime(sched->base_time);
 }
 
+static ktime_t taprio_get_offset(const struct taprio_sched *q)
+{
+	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
+	ktime_t time = ktime_get();
+
+	switch (tk_offset) {
+	case TK_OFFS_MAX:
+		return 0;
+	default:
+		return ktime_sub_ns(ktime_mono_to_any(time, tk_offset), time);
+	}
+}
+
 static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
 {
 	/* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
@@ -918,6 +932,7 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 	int num_tc = netdev_get_num_tc(dev);
 	struct sched_entry *entry, *next;
 	struct Qdisc *sch = q->root;
+	ktime_t now_offset = taprio_get_offset(q);
 	ktime_t end_time;
 	int tc;
 
@@ -957,6 +972,14 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 	end_time = ktime_add_ns(entry->end_time, next->interval);
 	end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
 
+	if (q->offset != now_offset) {
+		ktime_t diff = ktime_sub_ns(now_offset, q->offset);
+
+		end_time = ktime_add_ns(end_time, diff);
+		oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time, diff);
+		q->offset = now_offset;
+	}
+
 	for (tc = 0; tc < num_tc; tc++) {
 		if (next->gate_duration[tc] == oper->cycle_time)
 			next->gate_close_time[tc] = KTIME_MAX;
@@ -1210,6 +1233,7 @@ static int taprio_get_start_time(struct Qdisc *sch,
 
 	base = sched_base_time(sched);
 	now = taprio_get_time(q);
+	q->offset = taprio_get_offset(q);
 
 	if (ktime_after(base, now)) {
 		*start = base;
-- 
2.34.1
Re: [syzbot] [kasan?] [mm?] INFO: rcu detected stall in __run_timer_base
Posted by syzbot 1 year, 7 months ago
Hello,

syzbot has tested the proposed patch but the reproducer is still triggering an issue:
INFO: rcu detected stall in sys_mkdirat

rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: { 1-...D } 2672 jiffies s: 2029 root: 0x2/.
rcu: blocking rcu_node structures (internal RCU debug):
Sending NMI from CPU 0 to CPUs 1:
NMI backtrace for cpu 1
CPU: 1 PID: 5458 Comm: syz-executor.0 Not tainted 6.9.0-rc7-syzkaller-gdd5a440a31fa-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
RIP: 0010:lockdep_enabled kernel/locking/lockdep.c:122 [inline]
RIP: 0010:lock_release+0x125/0x9f0 kernel/locking/lockdep.c:5767
Code: 7e 85 c0 0f 85 23 05 00 00 65 48 8b 04 25 c0 d3 03 00 48 89 44 24 18 48 8d 98 d4 0a 00 00 48 89 d8 48 c1 e8 03 42 0f b6 04 38 <84> c0 0f 85 d8 05 00 00 83 3b 00 0f 85 f1 04 00 00 4c 8d b4 24 b0
RSP: 0018:ffffc90000a08a20 EFLAGS: 00000017
RAX: 0000000000000000 RBX: ffff888026abe4d4 RCX: ffffffff81728e30
RDX: 0000000000000000 RSI: ffffffff8c1f89c0 RDI: ffffffff8c1f8980
RBP: ffffc90000a08b50 R08: ffffffff8fa9faef R09: 1ffffffff1f53f5d
R10: dffffc0000000000 R11: fffffbfff1f53f5e R12: 1ffff92000141150
R13: ffffffff84b6f924 R14: ffffc90000a08b80 R15: dffffc0000000000
FS:  00005555763c6480(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fffe1c6dbf8 CR3: 0000000022fe4000 CR4: 0000000000350ef0
Call Trace:
 <NMI>
 </NMI>
 <IRQ>
 __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:149 [inline]
 _raw_spin_unlock_irqrestore+0x79/0x140 kernel/locking/spinlock.c:194
 debug_object_activate+0x3e4/0x510 lib/debugobjects.c:726
 debug_hrtimer_activate kernel/time/hrtimer.c:423 [inline]
 debug_activate kernel/time/hrtimer.c:478 [inline]
 enqueue_hrtimer+0x30/0x3c0 kernel/time/hrtimer.c:1090
 __run_hrtimer kernel/time/hrtimer.c:1709 [inline]
 __hrtimer_run_queues+0x6d5/0xd50 kernel/time/hrtimer.c:1756
 hrtimer_interrupt+0x396/0x990 kernel/time/hrtimer.c:1818
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1032 [inline]
 __sysvec_apic_timer_interrupt+0x112/0x3f0 arch/x86/kernel/apic/apic.c:1049
 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline]
 sysvec_apic_timer_interrupt+0xa1/0xc0 arch/x86/kernel/apic/apic.c:1043
 </IRQ>
 <TASK>
 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702
RIP: 0010:kmem_cache_free+0x15/0x2c0 mm/slub.c:4339
Code: 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 55 41 57 41 56 41 55 41 54 53 48 83 ec 10 48 89 f3 <65> 48 8b 04 25 28 00 00 00 48 89 44 24 08 e8 a8 02 00 00 48 85 c0
RSP: 0018:ffffc900042972b8 EFLAGS: 00000286
RAX: ffffffff825d0aa6 RBX: ffff88807794e820 RCX: ffff888026abda00
RDX: 0000000000000000 RSI: ffff88807794e820 RDI: ffff8880193dac80
RBP: ffffc900042974b0 R08: ffffffff825d0a81 R09: 1ffffffff1f53f5d
R10: dffffc0000000000 R11: fffffbfff1f53f5e R12: ffff88802f56c000
R13: ffff88807794e8a0 R14: ffffc90004297720 R15: 0000000000000001
 ext4_mb_new_blocks+0x2ab4/0x4bf0 fs/ext4/mballoc.c:6250
 ext4_ext_map_blocks+0x1c74/0x77b0 fs/ext4/extents.c:4317
 ext4_map_blocks+0xa5e/0x1d20 fs/ext4/inode.c:623
 ext4_getblk+0x1fa/0x850 fs/ext4/inode.c:833
 ext4_bread+0x2e/0x180 fs/ext4/inode.c:889
 ext4_append+0x327/0x5c0 fs/ext4/namei.c:83
 ext4_init_new_dir+0x33e/0xa30 fs/ext4/namei.c:2977
 ext4_mkdir+0x4f7/0xcf0 fs/ext4/namei.c:3023
 vfs_mkdir+0x2fb/0x4b0 fs/namei.c:4123
 do_mkdirat+0x264/0x3a0 fs/namei.c:4146
 __do_sys_mkdirat fs/namei.c:4161 [inline]
 __se_sys_mkdirat fs/namei.c:4159 [inline]
 __x64_sys_mkdirat+0x89/0xa0 fs/namei.c:4159
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f0aef87c967
Code: 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 02 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fffe1c6f498 EFLAGS: 00000202 ORIG_RAX: 0000000000000102
RAX: ffffffffffffffda RBX: 00007fffe1c6f520 RCX: 00007f0aef87c967
RDX: 00000000000001ff RSI: 00007fffe1c6f520 RDI: 00000000ffffff9c
RBP: 00007fffe1c6f4fc R08: 0000000000000004 R09: 00007fffe1c6f236
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000032
R13: 000000000001cfb4 R14: 000000000001cfab R15: 0000000000000004
 </TASK>


Tested on:

commit:         dd5a440a Linux 6.9-rc7
git tree:       https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
console output: https://syzkaller.appspot.com/x/log.txt?x=1298931f180000
kernel config:  https://syzkaller.appspot.com/x/.config?x=448b220a4abc599a
dashboard link: https://syzkaller.appspot.com/bug?extid=1acbadd9f48eeeacda29
compiler:       Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 2.40
patch:          https://syzkaller.appspot.com/x/patch.diff?x=125a2ca8980000