arch/riscv/kernel/cacheinfo.c | 5 --- drivers/base/arch_topology.c | 12 +++++- drivers/base/cacheinfo.c | 71 ++++++++++++++++++++++++++--------- include/linux/cacheinfo.h | 1 + 4 files changed, 65 insertions(+), 24 deletions(-)
From: Pierre Gondois <pierre.gondois@arm.com>
commit 5944ce092b97caed5d86d961e963b883b5c44ee2 upstream.
commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection
in the CPU hotplug path")
adds a call to detect_cache_attributes() to populate the cacheinfo
before updating the siblings mask. detect_cache_attributes() allocates
memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT
kernels, on secondary CPUs, this triggers a:
'BUG: sleeping function called from invalid context' [1]
as the code is executed with preemption and interrupts disabled.
The primary CPU was previously storing the cache information using
the now removed (struct cpu_topology).llc_id:
commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from
the CPU topology")
allocate_cache_info() tries to build the cacheinfo from the primary
CPU prior secondary CPUs boot, if the DT/ACPI description
contains cache information.
If allocate_cache_info() fails, then fallback to the current state
for the cacheinfo allocation. [1] will be triggered in such case.
When unplugging a CPU, the cacheinfo memory cannot be freed. If it
was, then the memory would be allocated early by the re-plugged
CPU and would trigger [1].
Note that populate_cache_leaves() might be called multiple times
due to populate_leaves being moved up. This is required since
detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu)
being allocated but not populated.
[1]:
| BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111
| preempt_count: 1, expected: 0
| RCU nest depth: 1, expected: 1
| 3 locks held by swapper/111/0:
| #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8
| #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0
| #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80
| irq event stamp: 0
| hardirqs last enabled at (0): 0x0
| hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last enabled at (0): copy_process+0x5dc/0x1ab8
| softirqs last disabled at (0): 0x0
| Preemption disabled at:
| migrate_enable+0x30/0x130
| CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...]
| Call trace:
| __kmalloc+0xbc/0x1e8
| detect_cache_attributes+0x2d4/0x5f0
| update_siblings_masks+0x30/0x368
| store_cpu_topology+0x78/0xb8
| secondary_start_kernel+0xd0/0x198
| __secondary_switched+0xb0/0xb4
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: <stable@vger.kernel.org> # 6.1.x: c3719bd:cacheinfo: Use RISC-V's init_cache_level() as generic OF implementation
Cc: <stable@vger.kernel.org> # 6.1.x: 8844c3d:cacheinfo: Return error code in init_of_cache_level(
Cc: <stable@vger.kernel.org> # 6.1.x: de0df44:cacheinfo: Check 'cache-unified' property to count cache leaves
Cc: <stable@vger.kernel.org> # 6.1.x: fa4d566:ACPI: PPTT: Remove acpi_find_cache_levels()
Cc: <stable@vger.kernel.org> # 6.1.x: bd50036:ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info(
Cc: <stable@vger.kernel.org> # 6.1.x
Signed-off-by: Wen Yang <wen.yang@linux.dev>
---
arch/riscv/kernel/cacheinfo.c | 5 ---
drivers/base/arch_topology.c | 12 +++++-
drivers/base/cacheinfo.c | 71 ++++++++++++++++++++++++++---------
include/linux/cacheinfo.h | 1 +
4 files changed, 65 insertions(+), 24 deletions(-)
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 440a3df5944c..3a13113f1b29 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -113,11 +113,6 @@ static void fill_cacheinfo(struct cacheinfo **this_leaf,
}
}
-int init_cache_level(unsigned int cpu)
-{
- return init_of_cache_level(cpu);
-}
-
int populate_cache_leaves(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index e7d6e6657ffa..b1c1dd38ab01 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -736,7 +736,7 @@ void update_siblings_masks(unsigned int cpuid)
ret = detect_cache_attributes(cpuid);
if (ret && ret != -ENOENT)
- pr_info("Early cacheinfo failed, ret = %d\n", ret);
+ pr_info("Early cacheinfo allocation failed, ret = %d\n", ret);
/* update core and thread sibling masks */
for_each_online_cpu(cpu) {
@@ -825,7 +825,7 @@ __weak int __init parse_acpi_topology(void)
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
void __init init_cpu_topology(void)
{
- int ret;
+ int cpu, ret;
reset_cpu_topology();
ret = parse_acpi_topology();
@@ -840,6 +840,14 @@ void __init init_cpu_topology(void)
reset_cpu_topology();
return;
}
+
+ for_each_possible_cpu(cpu) {
+ ret = fetch_cache_info(cpu);
+ if (ret) {
+ pr_err("Early cacheinfo failed, ret = %d\n", ret);
+ break;
+ }
+ }
}
void store_cpu_topology(unsigned int cpuid)
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index ab99b0f0d010..cd943d06d074 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -412,10 +412,6 @@ static void free_cache_attributes(unsigned int cpu)
return;
cache_shared_cpu_map_remove(cpu);
-
- kfree(per_cpu_cacheinfo(cpu));
- per_cpu_cacheinfo(cpu) = NULL;
- cache_leaves(cpu) = 0;
}
int __weak init_cache_level(unsigned int cpu)
@@ -428,29 +424,71 @@ int __weak populate_cache_leaves(unsigned int cpu)
return -ENOENT;
}
+static inline
+int allocate_cache_info(int cpu)
+{
+ per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
+ sizeof(struct cacheinfo), GFP_ATOMIC);
+ if (!per_cpu_cacheinfo(cpu)) {
+ cache_leaves(cpu) = 0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int fetch_cache_info(unsigned int cpu)
+{
+ struct cpu_cacheinfo *this_cpu_ci;
+ unsigned int levels, split_levels;
+ int ret;
+
+ if (acpi_disabled) {
+ ret = init_of_cache_level(cpu);
+ if (ret < 0)
+ return ret;
+ } else {
+ ret = acpi_get_cache_info(cpu, &levels, &split_levels);
+ if (ret < 0)
+ return ret;
+
+ this_cpu_ci = get_cpu_cacheinfo(cpu);
+ this_cpu_ci->num_levels = levels;
+ /*
+ * This assumes that:
+ * - there cannot be any split caches (data/instruction)
+ * above a unified cache
+ * - data/instruction caches come by pair
+ */
+ this_cpu_ci->num_leaves = levels + split_levels;
+ }
+ if (!cache_leaves(cpu))
+ return -ENOENT;
+
+ return allocate_cache_info(cpu);
+}
+
int detect_cache_attributes(unsigned int cpu)
{
int ret;
- /* Since early detection of the cacheinfo is allowed via this
- * function and this also gets called as CPU hotplug callbacks via
- * cacheinfo_cpu_online, the initialisation can be skipped and only
- * CPU maps can be updated as the CPU online status would be update
- * if called via cacheinfo_cpu_online path.
+ /* Since early initialization/allocation of the cacheinfo is allowed
+ * via fetch_cache_info() and this also gets called as CPU hotplug
+ * callbacks via cacheinfo_cpu_online, the init/alloc can be skipped
+ * as it will happen only once (the cacheinfo memory is never freed).
+ * Just populate the cacheinfo.
*/
if (per_cpu_cacheinfo(cpu))
- goto update_cpu_map;
+ goto populate_leaves;
if (init_cache_level(cpu) || !cache_leaves(cpu))
return -ENOENT;
- per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
- sizeof(struct cacheinfo), GFP_ATOMIC);
- if (per_cpu_cacheinfo(cpu) == NULL) {
- cache_leaves(cpu) = 0;
- return -ENOMEM;
- }
+ ret = allocate_cache_info(cpu);
+ if (ret)
+ return ret;
+populate_leaves:
/*
* populate_cache_leaves() may completely setup the cache leaves and
* shared_cpu_map or it may leave it partially setup.
@@ -459,7 +497,6 @@ int detect_cache_attributes(unsigned int cpu)
if (ret)
goto free_ci;
-update_cpu_map:
/*
* For systems using DT for cache hierarchy, fw_token
* and shared_cpu_map will be set up here only if they are
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 00d8e7f9d1c6..dfef57077cd0 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -85,6 +85,7 @@ int populate_cache_leaves(unsigned int cpu);
int cache_setup_acpi(unsigned int cpu);
bool last_level_cache_is_valid(unsigned int cpu);
bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
+int fetch_cache_info(unsigned int cpu);
int detect_cache_attributes(unsigned int cpu);
#ifndef CONFIG_ACPI_PPTT
/*
--
2.25.1
On Sat, Sep 27, 2025 at 01:46:58AM +0800, Wen Yang wrote: > From: Pierre Gondois <pierre.gondois@arm.com> > > commit 5944ce092b97caed5d86d961e963b883b5c44ee2 upstream. > > commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection > in the CPU hotplug path") > adds a call to detect_cache_attributes() to populate the cacheinfo > before updating the siblings mask. detect_cache_attributes() allocates > memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT > kernels, on secondary CPUs, this triggers a: > 'BUG: sleeping function called from invalid context' [1] > as the code is executed with preemption and interrupts disabled. > > The primary CPU was previously storing the cache information using > the now removed (struct cpu_topology).llc_id: > commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from > the CPU topology") > > allocate_cache_info() tries to build the cacheinfo from the primary > CPU prior secondary CPUs boot, if the DT/ACPI description > contains cache information. > If allocate_cache_info() fails, then fallback to the current state > for the cacheinfo allocation. [1] will be triggered in such case. > > When unplugging a CPU, the cacheinfo memory cannot be freed. If it > was, then the memory would be allocated early by the re-plugged > CPU and would trigger [1]. > > Note that populate_cache_leaves() might be called multiple times > due to populate_leaves being moved up. This is required since > detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu) > being allocated but not populated. > > [1]: > | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 > | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111 > | preempt_count: 1, expected: 0 > | RCU nest depth: 1, expected: 1 > | 3 locks held by swapper/111/0: > | #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8 > | #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0 > | #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80 > | irq event stamp: 0 > | hardirqs last enabled at (0): 0x0 > | hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8 > | softirqs last enabled at (0): copy_process+0x5dc/0x1ab8 > | softirqs last disabled at (0): 0x0 > | Preemption disabled at: > | migrate_enable+0x30/0x130 > | CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...] > | Call trace: > | __kmalloc+0xbc/0x1e8 > | detect_cache_attributes+0x2d4/0x5f0 > | update_siblings_masks+0x30/0x368 > | store_cpu_topology+0x78/0xb8 > | secondary_start_kernel+0xd0/0x198 > | __secondary_switched+0xb0/0xb4 > > Signed-off-by: Pierre Gondois <pierre.gondois@arm.com> > Reviewed-by: Sudeep Holla <sudeep.holla@arm.com> > Acked-by: Palmer Dabbelt <palmer@rivosinc.com> > Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com > Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> > Cc: <stable@vger.kernel.org> # 6.1.x: c3719bd:cacheinfo: Use RISC-V's init_cache_level() as generic OF implementation > Cc: <stable@vger.kernel.org> # 6.1.x: 8844c3d:cacheinfo: Return error code in init_of_cache_level( > Cc: <stable@vger.kernel.org> # 6.1.x: de0df44:cacheinfo: Check 'cache-unified' property to count cache leaves > Cc: <stable@vger.kernel.org> # 6.1.x: fa4d566:ACPI: PPTT: Remove acpi_find_cache_levels() > Cc: <stable@vger.kernel.org> # 6.1.x: bd50036:ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info( > Cc: <stable@vger.kernel.org> # 6.1.x I do not understand, why do you want all of these applied as well? Can you just send the full series of commits? > Signed-off-by: Wen Yang <wen.yang@linux.dev> Also, you have changed this commit a lot from the original one, please document what you did here. Also, why not just use 6.6.y instead? What is forcing you to use 6.1.y for this platform? What caused this issue to just show up now? thanks, greg k-h
On 9/29/25 21:21, Greg Kroah-Hartman wrote: > On Sat, Sep 27, 2025 at 01:46:58AM +0800, Wen Yang wrote: >> From: Pierre Gondois <pierre.gondois@arm.com> >> >> commit 5944ce092b97caed5d86d961e963b883b5c44ee2 upstream. >> >> adds a call to detect_cache_attributes() to populate the cacheinfo >> before updating the siblings mask. detect_cache_attributes() allocates >> memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT >> kernels, on secondary CPUs, this triggers a: >> 'BUG: sleeping function called from invalid context' [1] >> as the code is executed with preemption and interrupts disabled. >> >> The primary CPU was previously storing the cache information using >> the now removed (struct cpu_topology).llc_id: >> commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from >> the CPU topology") >> >> allocate_cache_info() tries to build the cacheinfo from the primary >> CPU prior secondary CPUs boot, if the DT/ACPI description >> contains cache information. >> If allocate_cache_info() fails, then fallback to the current state >> for the cacheinfo allocation. [1] will be triggered in such case. >> >> When unplugging a CPU, the cacheinfo memory cannot be freed. If it >> was, then the memory would be allocated early by the re-plugged >> CPU and would trigger [1]. >> >> Note that populate_cache_leaves() might be called multiple times >> due to populate_leaves being moved up. This is required since >> detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu) >> being allocated but not populated. >> >> [1]: >> | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 >> | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111 >> | preempt_count: 1, expected: 0 >> | RCU nest depth: 1, expected: 1 >> | 3 locks held by swapper/111/0: >> | #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8 >> | #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0 >> | #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80 >> | irq event stamp: 0 >> | hardirqs last enabled at (0): 0x0 >> | hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8 >> | softirqs last enabled at (0): copy_process+0x5dc/0x1ab8 >> | softirqs last disabled at (0): 0x0 >> | Preemption disabled at: >> | migrate_enable+0x30/0x130 >> | CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...] >> | Call trace: >> | __kmalloc+0xbc/0x1e8 >> | detect_cache_attributes+0x2d4/0x5f0 >> | update_siblings_masks+0x30/0x368 >> | store_cpu_topology+0x78/0xb8 >> | secondary_start_kernel+0xd0/0x198 >> | __secondary_switched+0xb0/0xb4 >> >> Signed-off-by: Pierre Gondois <pierre.gondois@arm.com> >> Reviewed-by: Sudeep Holla <sudeep.holla@arm.com> >> Acked-by: Palmer Dabbelt <palmer@rivosinc.com> >> Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com >> Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> >> Cc: <stable@vger.kernel.org> # 6.1.x: c3719bd:cacheinfo: Use RISC-V's init_cache_level() as generic OF implementation >> Cc: <stable@vger.kernel.org> # 6.1.x: 8844c3d:cacheinfo: Return error code in init_of_cache_level( >> Cc: <stable@vger.kernel.org> # 6.1.x: de0df44:cacheinfo: Check 'cache-unified' property to count cache leaves >> Cc: <stable@vger.kernel.org> # 6.1.x: fa4d566:ACPI: PPTT: Remove acpi_find_cache_levels() >> Cc: <stable@vger.kernel.org> # 6.1.x: bd50036:ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info( >> Cc: <stable@vger.kernel.org> # 6.1.x > > I do not understand, why do you want all of these applied as well? Can > you just send the full series of commits? > Thanks for your comments, here is the original series: https://lore.kernel.org/all/167404285593.885445.6219705651301997538.b4-ty@arm.com/ commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the CPU hotplug path") introduced a bug, and this series fixed it. >> Signed-off-by: Wen Yang <wen.yang@linux.dev> > > Also, you have changed this commit a lot from the original one, please > document what you did here. > Thanks for the reminder. We just hope to cherry-pick them onto the 6.1 stable branch, without modifying the original commit. Also checked again, as follows: $ git cherry-pick c3719bd $ git cherry-pick 8844c3d $ git cherry-pick de0df44 $ git cherry-pick fa4d566 $ git cherry-pick bd50036 $ git cherry-pick 5944ce0 $ git format-patch HEAD -1 $ diff 0001-arch_topology-Build-cacheinfo-from-primary-CPU.patch 20250927_wen_yang_arch_topology_build_cacheinfo_from_primary_cpu.mbx Consistent with the original commit. > Also, why not just use 6.6.y instead? What is forcing you to use 6.1.y > for this platform? What caused this issue to just show up now? > Thank you for your suggestion. But our production environment has been using 6.1.y-rt for quite some time now, so we can only gradually migrate to 6.6.y. Perhaps some recently added loads related to power on/off have made it easier for this bug to be exposed. Also hope that the upstream 6.1.y branch could fix it. -- Best wishes, Wen
On Tue, Sep 30, 2025 at 01:57:40AM +0800, Wen Yang wrote: > > > On 9/29/25 21:21, Greg Kroah-Hartman wrote: > > On Sat, Sep 27, 2025 at 01:46:58AM +0800, Wen Yang wrote: > > > From: Pierre Gondois <pierre.gondois@arm.com> > > > > > > commit 5944ce092b97caed5d86d961e963b883b5c44ee2 upstream. > > > > > > > adds a call to detect_cache_attributes() to populate the cacheinfo > > > before updating the siblings mask. detect_cache_attributes() allocates > > > memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT > > > kernels, on secondary CPUs, this triggers a: > > > 'BUG: sleeping function called from invalid context' [1] > > > as the code is executed with preemption and interrupts disabled. > > > > > > The primary CPU was previously storing the cache information using > > > the now removed (struct cpu_topology).llc_id: > > > commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from > > > the CPU topology") > > > > > > allocate_cache_info() tries to build the cacheinfo from the primary > > > CPU prior secondary CPUs boot, if the DT/ACPI description > > > contains cache information. > > > If allocate_cache_info() fails, then fallback to the current state > > > for the cacheinfo allocation. [1] will be triggered in such case. > > > > > > When unplugging a CPU, the cacheinfo memory cannot be freed. If it > > > was, then the memory would be allocated early by the re-plugged > > > CPU and would trigger [1]. > > > > > > Note that populate_cache_leaves() might be called multiple times > > > due to populate_leaves being moved up. This is required since > > > detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu) > > > being allocated but not populated. > > > > > > [1]: > > > | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 > > > | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111 > > > | preempt_count: 1, expected: 0 > > > | RCU nest depth: 1, expected: 1 > > > | 3 locks held by swapper/111/0: > > > | #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8 > > > | #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0 > > > | #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80 > > > | irq event stamp: 0 > > > | hardirqs last enabled at (0): 0x0 > > > | hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8 > > > | softirqs last enabled at (0): copy_process+0x5dc/0x1ab8 > > > | softirqs last disabled at (0): 0x0 > > > | Preemption disabled at: > > > | migrate_enable+0x30/0x130 > > > | CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...] > > > | Call trace: > > > | __kmalloc+0xbc/0x1e8 > > > | detect_cache_attributes+0x2d4/0x5f0 > > > | update_siblings_masks+0x30/0x368 > > > | store_cpu_topology+0x78/0xb8 > > > | secondary_start_kernel+0xd0/0x198 > > > | __secondary_switched+0xb0/0xb4 > > > > > > Signed-off-by: Pierre Gondois <pierre.gondois@arm.com> > > > Reviewed-by: Sudeep Holla <sudeep.holla@arm.com> > > > Acked-by: Palmer Dabbelt <palmer@rivosinc.com> > > > Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com > > > Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> > > > Cc: <stable@vger.kernel.org> # 6.1.x: c3719bd:cacheinfo: Use RISC-V's init_cache_level() as generic OF implementation > > > Cc: <stable@vger.kernel.org> # 6.1.x: 8844c3d:cacheinfo: Return error code in init_of_cache_level( > > > Cc: <stable@vger.kernel.org> # 6.1.x: de0df44:cacheinfo: Check 'cache-unified' property to count cache leaves > > > Cc: <stable@vger.kernel.org> # 6.1.x: fa4d566:ACPI: PPTT: Remove acpi_find_cache_levels() > > > Cc: <stable@vger.kernel.org> # 6.1.x: bd50036:ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info( > > > Cc: <stable@vger.kernel.org> # 6.1.x > > > > I do not understand, why do you want all of these applied as well? Can > > you just send the full series of commits? > > > Thanks for your comments, here is the original series: > https://lore.kernel.org/all/167404285593.885445.6219705651301997538.b4-ty@arm.com/ > > commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the > CPU hotplug path") introduced a bug, and this series fixed it. > > > > Signed-off-by: Wen Yang <wen.yang@linux.dev> > > > > Also, you have changed this commit a lot from the original one, please > > document what you did here. > > > Thanks for the reminder. We just hope to cherry-pick them onto the 6.1 > stable branch, without modifying the original commit. > Also checked again, as follows: > > $ git cherry-pick c3719bd > $ git cherry-pick 8844c3d > $ git cherry-pick de0df44 > $ git cherry-pick fa4d566 > $ git cherry-pick bd50036 > $ git cherry-pick 5944ce0 > > $ git format-patch HEAD -1 > > $ diff 0001-arch_topology-Build-cacheinfo-from-primary-CPU.patch > 20250927_wen_yang_arch_topology_build_cacheinfo_from_primary_cpu.mbx Can you resend these all as a patch series with your signed-off-by on them to show that you have tested them? And again, the commit here did not seem to match up with the original upstream version, but maybe my tools got it wrong. Resend the series and I'll check it again. thanks, greg k-h
On 9/30/25 02:29, Greg Kroah-Hartman wrote: > On Tue, Sep 30, 2025 at 01:57:40AM +0800, Wen Yang wrote: >> >> >> On 9/29/25 21:21, Greg Kroah-Hartman wrote: >>> On Sat, Sep 27, 2025 at 01:46:58AM +0800, Wen Yang wrote: >>>> From: Pierre Gondois <pierre.gondois@arm.com> >>>> >>>> commit 5944ce092b97caed5d86d961e963b883b5c44ee2 upstream. >>>> >> >>>> adds a call to detect_cache_attributes() to populate the cacheinfo >>>> before updating the siblings mask. detect_cache_attributes() allocates >>>> memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT >>>> kernels, on secondary CPUs, this triggers a: >>>> 'BUG: sleeping function called from invalid context' [1] >>>> as the code is executed with preemption and interrupts disabled. >>>> >>>> The primary CPU was previously storing the cache information using >>>> the now removed (struct cpu_topology).llc_id: >>>> commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from >>>> the CPU topology") >>>> >>>> allocate_cache_info() tries to build the cacheinfo from the primary >>>> CPU prior secondary CPUs boot, if the DT/ACPI description >>>> contains cache information. >>>> If allocate_cache_info() fails, then fallback to the current state >>>> for the cacheinfo allocation. [1] will be triggered in such case. >>>> >>>> When unplugging a CPU, the cacheinfo memory cannot be freed. If it >>>> was, then the memory would be allocated early by the re-plugged >>>> CPU and would trigger [1]. >>>> >>>> Note that populate_cache_leaves() might be called multiple times >>>> due to populate_leaves being moved up. This is required since >>>> detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu) >>>> being allocated but not populated. >>>> >>>> [1]: >>>> | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 >>>> | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111 >>>> | preempt_count: 1, expected: 0 >>>> | RCU nest depth: 1, expected: 1 >>>> | 3 locks held by swapper/111/0: >>>> | #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8 >>>> | #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0 >>>> | #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80 >>>> | irq event stamp: 0 >>>> | hardirqs last enabled at (0): 0x0 >>>> | hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8 >>>> | softirqs last enabled at (0): copy_process+0x5dc/0x1ab8 >>>> | softirqs last disabled at (0): 0x0 >>>> | Preemption disabled at: >>>> | migrate_enable+0x30/0x130 >>>> | CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...] >>>> | Call trace: >>>> | __kmalloc+0xbc/0x1e8 >>>> | detect_cache_attributes+0x2d4/0x5f0 >>>> | update_siblings_masks+0x30/0x368 >>>> | store_cpu_topology+0x78/0xb8 >>>> | secondary_start_kernel+0xd0/0x198 >>>> | __secondary_switched+0xb0/0xb4 >>>> >>>> Signed-off-by: Pierre Gondois <pierre.gondois@arm.com> >>>> Reviewed-by: Sudeep Holla <sudeep.holla@arm.com> >>>> Acked-by: Palmer Dabbelt <palmer@rivosinc.com> >>>> Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com >>>> Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> >>>> Cc: <stable@vger.kernel.org> # 6.1.x: c3719bd:cacheinfo: Use RISC-V's init_cache_level() as generic OF implementation >>>> Cc: <stable@vger.kernel.org> # 6.1.x: 8844c3d:cacheinfo: Return error code in init_of_cache_level( >>>> Cc: <stable@vger.kernel.org> # 6.1.x: de0df44:cacheinfo: Check 'cache-unified' property to count cache leaves >>>> Cc: <stable@vger.kernel.org> # 6.1.x: fa4d566:ACPI: PPTT: Remove acpi_find_cache_levels() >>>> Cc: <stable@vger.kernel.org> # 6.1.x: bd50036:ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info( >>>> Cc: <stable@vger.kernel.org> # 6.1.x >>> >>> I do not understand, why do you want all of these applied as well? Can >>> you just send the full series of commits? >>> >> Thanks for your comments, here is the original series: >> https://lore.kernel.org/all/167404285593.885445.6219705651301997538.b4-ty@arm.com/ >> >> commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the >> CPU hotplug path") introduced a bug, and this series fixed it. >> >>>> Signed-off-by: Wen Yang <wen.yang@linux.dev> >>> >>> Also, you have changed this commit a lot from the original one, please >>> document what you did here. >>> >> Thanks for the reminder. We just hope to cherry-pick them onto the 6.1 >> stable branch, without modifying the original commit. >> Also checked again, as follows: >> >> $ git cherry-pick c3719bd >> $ git cherry-pick 8844c3d >> $ git cherry-pick de0df44 >> $ git cherry-pick fa4d566 >> $ git cherry-pick bd50036 >> $ git cherry-pick 5944ce0 >> >> $ git format-patch HEAD -1 >> >> $ diff 0001-arch_topology-Build-cacheinfo-from-primary-CPU.patch >> 20250927_wen_yang_arch_topology_build_cacheinfo_from_primary_cpu.mbx > > > Can you resend these all as a patch series with your signed-off-by on > them to show that you have tested them? > > And again, the commit here did not seem to match up with the original > upstream version, but maybe my tools got it wrong. Resend the series > and I'll check it again. > Thanks. We will resend this series soon. -- Best wishes, Wen
© 2016 - 2025 Red Hat, Inc.