From nobody Sun Apr 12 21:00:57 2026 Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.15]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3D8383BC69E for ; Wed, 1 Apr 2026 21:47:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=198.175.65.15 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1775080037; cv=none; b=esKcM6joqyKvqY4+nQB9JB2qR6GhFSg6T8OX5kPgXXAsbogezB6Rb+wY/hOWuwwzf1p/6+/+cBOlh7JrPWDgYiH8kzb3000n9ia80RGxk+2GiZRKJm0tV2MSAn4ohrDR1HCTDVgCfzvIZVEuorDZ9r9ZyYa4qjmtUggj8yE5R6g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1775080037; c=relaxed/simple; bh=z252zsjK1jb7r+xvopuqKqbzJCAc2JcyZOKONSwy3QA=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Pr31YVvXtJvL3a4XSmpvxP/ak+n4EYzQtJtXh4vgiaPlzW8Fzn64IX9QW7oUFKmka0+/4Gi85R3N00+17Bchj+S5eOG1Ep5TBmeOJ+rdd/DQVaGYdPOUIRdP9GB5jCIuBUFAQEeubjoFdKu0Zbe3adRA8klPbtQ0xadGSbFQ2Ew= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.intel.com; spf=pass smtp.mailfrom=linux.intel.com; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b=SbeoZiJW; arc=none smtp.client-ip=198.175.65.15 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linux.intel.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linux.intel.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=intel.com header.i=@intel.com header.b="SbeoZiJW" DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1775080036; x=1806616036; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=z252zsjK1jb7r+xvopuqKqbzJCAc2JcyZOKONSwy3QA=; b=SbeoZiJWXPIENwzLSuk5BXCKICps8sl8x1Hm6wjZh04HcCHtCWbZ+gmV tlCqkpCq7AM1BfWgKxoFoFydnn/JC55JU0501BX1Pn0s8FIzeLJyaN9ru Nf1M5L9z8xxrOgkZVxuQPiwALvu01VB4975PrfDdAVPjYvSQKheKOVYDA i2mif/YiktWjLa23SitNnKqgXyJTNh4+4slQX+ZdQftOVMjoTSQWNyNlZ PLLyEgxtNzn/6UKfm/YVKbSyrUScr+jRafRs7oDC/eGX+yqbsBVvGR6bu 9U2IZ8xWtGznSbDGi36k8rh5j8xUacXTfgbOQWGxGRuPj0SDYuQ9fq6UI w==; X-CSE-ConnectionGUID: y7Vm1cERRGmyXnxmsTJeTg== X-CSE-MsgGUID: xYXUENzqRsqJ1ahvx0Eyeg== X-IronPort-AV: E=McAfee;i="6800,10657,11746"; a="79740099" X-IronPort-AV: E=Sophos;i="6.23,153,1770624000"; d="scan'208";a="79740099" Received: from fmviesa002.fm.intel.com ([10.60.135.142]) by orvoesa107.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 01 Apr 2026 14:47:16 -0700 X-CSE-ConnectionGUID: 5BF5QjVaQxeHPym/Y1yOTw== X-CSE-MsgGUID: b1R6N6cWQ2KWgVKaUdoqJw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.23,153,1770624000"; d="scan'208";a="249842496" Received: from b04f130c83f2.jf.intel.com ([10.165.154.98]) by fmviesa002.fm.intel.com with ESMTP; 01 Apr 2026 14:47:14 -0700 From: Tim Chen To: Peter Zijlstra , Ingo Molnar , K Prateek Nayak , "Gautham R . Shenoy" , Vincent Guittot Cc: Chen Yu , Juri Lelli , Dietmar Eggemann , Steven Rostedt , Ben Segall , Mel Gorman , Valentin Schneider , Madadi Vineeth Reddy , Hillf Danton , Shrikanth Hegde , Jianyong Wu , Yangyu Chen , Tingyin Duan , Vern Hao , Vern Hao , Len Brown , Tim Chen , Aubrey Li , Zhao Liu , Chen Yu , Adam Li , Aaron Lu , Tim Chen , Josh Don , Gavin Guo , Qais Yousef , Libo Chen , linux-kernel@vger.kernel.org Subject: [Patch v4 19/22] sched/cache: Allow the user space to turn on and off cache aware scheduling Date: Wed, 1 Apr 2026 14:52:31 -0700 Message-Id: <0aa56f7fc48db2f8f700cd1aa34dedd0ec88351b.1775065312.git.tim.c.chen@linux.intel.com> X-Mailer: git-send-email 2.32.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Chen Yu Provide a debugfs directory llc_balancing, and a knob named "enabled" under it to allow the user to turn off and on the cache aware scheduling at runtime. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Chen Yu Signed-off-by: Tim Chen --- Notes: v3->v4: Create the debugfs knobs under debug/sched/llc_balancing directory. (Peter Zijlstra) kernel/sched/debug.c | 48 +++++++++++++++++++++++++++++- kernel/sched/sched.h | 7 ++++- kernel/sched/topology.c | 65 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 2 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index b24f40f05019..3019412d8009 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -209,6 +209,46 @@ static const struct file_operations sched_scaling_fops= =3D { .release =3D single_release, }; =20 +#ifdef CONFIG_SCHED_CACHE +static ssize_t +sched_cache_enable_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + bool val; + int ret; + + ret =3D kstrtobool_from_user(ubuf, cnt, &val); + if (ret) + return ret; + + sysctl_sched_cache_user =3D val; + + sched_cache_active_set_unlocked(); + + return cnt; +} + +static int sched_cache_enable_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", sysctl_sched_cache_user); + return 0; +} + +static int sched_cache_enable_open(struct inode *inode, + struct file *filp) +{ + return single_open(filp, sched_cache_enable_show, NULL); +} + +static const struct file_operations sched_cache_enable_fops =3D { + .open =3D sched_cache_enable_open, + .write =3D sched_cache_enable_write, + .read =3D seq_read, + .llseek =3D seq_lseek, + .release =3D single_release, +}; +#endif + #ifdef CONFIG_PREEMPT_DYNAMIC =20 static ssize_t sched_dynamic_write(struct file *filp, const char __user *u= buf, @@ -592,7 +632,7 @@ static void debugfs_ext_server_init(void) =20 static __init int sched_init_debug(void) { - struct dentry __maybe_unused *numa; + struct dentry __maybe_unused *numa, *llc; =20 debugfs_sched =3D debugfs_create_dir("sched", NULL); =20 @@ -625,6 +665,12 @@ static __init int sched_init_debug(void) debugfs_create_u32("hot_threshold_ms", 0644, numa, &sysctl_numa_balancing= _hot_threshold); #endif /* CONFIG_NUMA_BALANCING */ =20 +#ifdef CONFIG_SCHED_CACHE + llc =3D debugfs_create_dir("llc_balancing", debugfs_sched); + debugfs_create_file("enabled", 0644, llc, NULL, + &sched_cache_enable_fops); +#endif + debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops= ); =20 debugfs_fair_server_init(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 518c798231ac..5561bdcc8bf5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -4036,11 +4036,16 @@ static inline void mm_cid_switch_to(struct task_str= uct *prev, struct task_struct =20 #ifdef CONFIG_SCHED_CACHE DECLARE_STATIC_KEY_FALSE(sched_cache_present); +DECLARE_STATIC_KEY_FALSE(sched_cache_active); +extern int sysctl_sched_cache_user; =20 static inline bool sched_cache_enabled(void) { - return static_branch_unlikely(&sched_cache_present); + return static_branch_unlikely(&sched_cache_active); } + +extern void sched_cache_active_set_unlocked(void); + #endif =20 void sched_domains_free_llc_id(int cpu); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 0b1fc1b0709d..ceb17ef31ef6 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -809,7 +809,16 @@ enum s_alloc { }; =20 #ifdef CONFIG_SCHED_CACHE +/* hardware support for cache aware scheduling */ DEFINE_STATIC_KEY_FALSE(sched_cache_present); +/* + * Indicator of whether cache aware scheduling + * is active, used by the scheduler. + */ +DEFINE_STATIC_KEY_FALSE(sched_cache_active); +/* user wants cache aware scheduling [0 or 1] */ +int sysctl_sched_cache_user =3D 1; + static bool alloc_sd_llc(const struct cpumask *cpu_map, struct s_data *d) { @@ -843,6 +852,60 @@ static bool alloc_sd_llc(const struct cpumask *cpu_map, =20 return false; } + +static void _sched_cache_active_set(bool enable, bool locked) +{ + if (enable) { + if (locked) + static_branch_enable_cpuslocked(&sched_cache_active); + else + static_branch_enable(&sched_cache_active); + } else { + if (locked) + static_branch_disable_cpuslocked(&sched_cache_active); + else + static_branch_disable(&sched_cache_active); + } +} + +/* + * Enable/disable cache aware scheduling according to + * user input and the presence of hardware support. + */ +static void sched_cache_active_set(bool locked) +{ + /* hardware does not support */ + if (!static_branch_likely(&sched_cache_present)) { + _sched_cache_active_set(false, locked); + return; + } + + /* + * user wants it or not ? + * TBD: read before writing the static key. + * It is not in the critical path, leave as-is + * for now. + */ + if (sysctl_sched_cache_user) { + _sched_cache_active_set(true, locked); + if (sched_debug()) + pr_info("%s: enabling cache aware scheduling\n", __func__); + } else { + _sched_cache_active_set(false, locked); + if (sched_debug()) + pr_info("%s: disabling cache aware scheduling\n", __func__); + } +} + +static void sched_cache_active_set_locked(void) +{ + return sched_cache_active_set(true); +} + +void sched_cache_active_set_unlocked(void) +{ + return sched_cache_active_set(false); +} #else static bool alloc_sd_llc(const struct cpumask *cpu_map, struct s_data *d) @@ -2855,6 +2918,8 @@ build_sched_domains(const struct cpumask *cpu_map, st= ruct sched_domain_attr *att static_branch_enable_cpuslocked(&sched_cache_present); else static_branch_disable_cpuslocked(&sched_cache_present); + + sched_cache_active_set_locked(); #endif __free_domain_allocs(&d, alloc_state, cpu_map); =20 --=20 2.32.0