arch/sparc/kernel/irq_64.c | 10 +++----- arch/sparc/kernel/of_device_64.c | 5 +--- arch/sparc/kernel/pci_msi.c | 5 +--- arch/sparc/mm/init_64.c | 2 +- arch/sparc/mm/srmmu.c | 40 ++++++++++---------------------- 5 files changed, 18 insertions(+), 44 deletions(-)
Hi, This series aims at removing on-stack cpumask var usage for sparc arch. Generally it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Dawei Li (5): sparc/srmmu: Remove on-stack cpumask var sparc/irq: Remove on-stack cpumask var sparc/of: Remove on-stack cpumask var sparc/pci_msi: Remove on-stack cpumask var sparc: Remove on-stack cpumask var arch/sparc/kernel/irq_64.c | 10 +++----- arch/sparc/kernel/of_device_64.c | 5 +--- arch/sparc/kernel/pci_msi.c | 5 +--- arch/sparc/mm/init_64.c | 2 +- arch/sparc/mm/srmmu.c | 40 ++++++++++---------------------- 5 files changed, 18 insertions(+), 44 deletions(-) Thanks, Dawei -- 2.27.0
Hi Dawei, On Thu, Apr 18, 2024 at 06:49:44PM +0800, Dawei Li wrote: > Hi, > > This series aims at removing on-stack cpumask var usage for sparc arch. > > Generally it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. Took a quick look at the patches, looks good except the one the bot already complained about. A quick grep shows a few more cases where we have an on-stack cpumask in sparc code. kernel/ds.c: cpumask_t mask; kernel/leon_kernel.c: cpumask_t mask; kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, Do you plan to look at the other on-stack users too? It would be nice to see them all gone in one patch-set. Sam
Hi Sam, Thanks for the review. On Fri, Apr 19, 2024 at 07:13:50AM +0200, Sam Ravnborg wrote: > Hi Dawei, > > On Thu, Apr 18, 2024 at 06:49:44PM +0800, Dawei Li wrote: > > Hi, > > > > This series aims at removing on-stack cpumask var usage for sparc arch. > > > > Generally it's preferable to avoid placing cpumasks on the stack, as > > for large values of NR_CPUS these can consume significant amounts of > > stack space and make stack overflows more likely. > > Took a quick look at the patches, looks good except the one the bot > already complained about. I will fix this building warning in respinning. > A quick grep shows a few more cases where we have an on-stack cpumask > in sparc code. > > kernel/ds.c: cpumask_t mask; About this case, it's kinda tricky for: - dr_cpu_data() returns void, so alloc_cpumask_var() is no go. - No idea of the calling context of dr_cpu_data(). IIUC, dr_cpu_data() ->dr_cpu_configure() ->kzalloc(resp_len, GFP_KERNEL) So I guess it's in process context? If consumption above is OK, a simple but _ugly_ solution could be: diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index ffdc15588ac2..c9e4ebdccf49 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -634,7 +634,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); u32 *cpu_list = (u32 *) (tag + 1); u64 req_num = tag->req_num; - cpumask_t mask; + static DEFINE_MUTEX(mask_lock); + static cpumask_t mask; unsigned int i; int err; @@ -651,6 +652,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, purge_dups(cpu_list, tag->num_records); + mutex_lock(&mask_lock); + cpumask_clear(&mask); for (i = 0; i < tag->num_records; i++) { if (cpu_list[i] == CPU_SENTINEL) @@ -665,6 +668,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, else err = dr_cpu_unconfigure(dp, cp, req_num, &mask); + mutex_unlock(&mask_lock); + if (err) dr_cpu_send_error(dp, cp, data); } How does it sound to you? > kernel/leon_kernel.c: cpumask_t mask; It's in irqchip::irq_set_affinity(), which is in atomic context(raw spinlock(s) held), so dynamic allocation is not a good idea. My proposal(*untested*) is somewhat complicated for it introduces a new helper. diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 4c61da491fee..6eced7acb8bc 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -104,15 +104,25 @@ unsigned long leon_get_irqmask(unsigned int irq) } #ifdef CONFIG_SMP + +static bool cpumask_include(const struct cpumask *srcp1, const struct cpumask *srcp2) +{ + unsigned int cpu; + + for_each_cpu(cpu, srcp2) { + if (!cpumask_test_cpu(cpu, srcp1)) + return false; + } + + return true; +} + static int irq_choose_cpu(const struct cpumask *affinity) { - cpumask_t mask; + unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask); - cpumask_and(&mask, cpu_online_mask, affinity); - if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask)) - return boot_cpu_id; - else - return cpumask_first(&mask); + return cpumask_include(affinity, cpu_online_mask) || cpu >= nr_cpu_ids ? + boot_cpu_id : cpu; } #else #define irq_choose_cpu(affinity) boot_cpu_id Is it OK? [cc Yury for bitmap API] > kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, Actually I am awared of existence of (at least some of) them, but so far I have not found a _proper_ way of dealing with them(especially for case of ds.c). Please lemme dig into it. Thanks, Dawei > > Do you plan to look at the other on-stack users too? > It would be nice to see them all gone in one patch-set. > > Sam >
On Fri, Apr 19, 2024 at 05:26:34PM +0800, Dawei Li wrote: > Hi Sam, > > Thanks for the review. > > On Fri, Apr 19, 2024 at 07:13:50AM +0200, Sam Ravnborg wrote: > > Hi Dawei, > > > > On Thu, Apr 18, 2024 at 06:49:44PM +0800, Dawei Li wrote: > > > Hi, > > > > > > This series aims at removing on-stack cpumask var usage for sparc arch. > > > > > > Generally it's preferable to avoid placing cpumasks on the stack, as > > > for large values of NR_CPUS these can consume significant amounts of > > > stack space and make stack overflows more likely. > > > > Took a quick look at the patches, looks good except the one the bot > > already complained about. > > I will fix this building warning in respinning. > > > A quick grep shows a few more cases where we have an on-stack cpumask > > in sparc code. > > > > kernel/ds.c: cpumask_t mask; > > About this case, it's kinda tricky for: > - dr_cpu_data() returns void, so alloc_cpumask_var() is no go. > > - No idea of the calling context of dr_cpu_data(). IIUC, > dr_cpu_data() > ->dr_cpu_configure() > ->kzalloc(resp_len, GFP_KERNEL) > So I guess it's in process context? > If consumption above is OK, a simple but _ugly_ solution could be: > > diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c > index ffdc15588ac2..c9e4ebdccf49 100644 > --- a/arch/sparc/kernel/ds.c > +++ b/arch/sparc/kernel/ds.c > @@ -634,7 +634,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, > struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); > u32 *cpu_list = (u32 *) (tag + 1); > u64 req_num = tag->req_num; > - cpumask_t mask; > + static DEFINE_MUTEX(mask_lock); > + static cpumask_t mask; > unsigned int i; > int err; > > @@ -651,6 +652,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, > > purge_dups(cpu_list, tag->num_records); > > + mutex_lock(&mask_lock); > + > cpumask_clear(&mask); > for (i = 0; i < tag->num_records; i++) { > if (cpu_list[i] == CPU_SENTINEL) > @@ -665,6 +668,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, > else > err = dr_cpu_unconfigure(dp, cp, req_num, &mask); > > + mutex_unlock(&mask_lock); > + > if (err) > dr_cpu_send_error(dp, cp, data); > } > > How does it sound to you? > > > kernel/leon_kernel.c: cpumask_t mask; > > It's in irqchip::irq_set_affinity(), which is in atomic context(raw spinlock(s) held), > so dynamic allocation is not a good idea. > > My proposal(*untested*) is somewhat complicated for it introduces a new helper. > > diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c > index 4c61da491fee..6eced7acb8bc 100644 > --- a/arch/sparc/kernel/leon_kernel.c > +++ b/arch/sparc/kernel/leon_kernel.c > @@ -104,15 +104,25 @@ unsigned long leon_get_irqmask(unsigned int irq) > } > > #ifdef CONFIG_SMP > + > +static bool cpumask_include(const struct cpumask *srcp1, const struct cpumask *srcp2) Don't steal the other's subsystems prefixes. > +{ > + unsigned int cpu; > + > + for_each_cpu(cpu, srcp2) { > + if (!cpumask_test_cpu(cpu, srcp1)) > + return false; > + } > + > + return true; > +} We've got cpumask_subset() for this. > static int irq_choose_cpu(const struct cpumask *affinity) > { > - cpumask_t mask; > + unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask); > > - cpumask_and(&mask, cpu_online_mask, affinity); > - if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask)) > - return boot_cpu_id; > - else > - return cpumask_first(&mask); > + return cpumask_include(affinity, cpu_online_mask) || cpu >= nr_cpu_ids ? > + boot_cpu_id : cpu; > } > #else > #define irq_choose_cpu(affinity) boot_cpu_id > > Is it OK? > > [cc Yury for bitmap API] > > > kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > Actually I am awared of existence of (at least some of) them, but so far I > have not found a _proper_ way of dealing with them(especially for case of > ds.c). > > Please lemme dig into it. > > Thanks, > > Dawei > > > > > Do you plan to look at the other on-stack users too? > > It would be nice to see them all gone in one patch-set. > > > > Sam > >
Hi Dawei, > About this case, it's kinda tricky for: > - dr_cpu_data() returns void, so alloc_cpumask_var() is no go. > > - No idea of the calling context of dr_cpu_data(). IIUC, > dr_cpu_data() > ->dr_cpu_configure() > ->kzalloc(resp_len, GFP_KERNEL) > So I guess it's in process context? > If consumption above is OK, a simple but _ugly_ solution could be: > > diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c > index ffdc15588ac2..c9e4ebdccf49 100644 > --- a/arch/sparc/kernel/ds.c > +++ b/arch/sparc/kernel/ds.c > @@ -634,7 +634,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, > struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); > u32 *cpu_list = (u32 *) (tag + 1); > u64 req_num = tag->req_num; > - cpumask_t mask; > + static DEFINE_MUTEX(mask_lock); > + static cpumask_t mask; > unsigned int i; > int err; > > @@ -651,6 +652,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, > > purge_dups(cpu_list, tag->num_records); > > + mutex_lock(&mask_lock); > + > cpumask_clear(&mask); > for (i = 0; i < tag->num_records; i++) { > if (cpu_list[i] == CPU_SENTINEL) > @@ -665,6 +668,8 @@ static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, > else > err = dr_cpu_unconfigure(dp, cp, req_num, &mask); > > + mutex_unlock(&mask_lock); > + > if (err) > dr_cpu_send_error(dp, cp, data); > } > > How does it sound to you? This introduces too much complexity to solve a potential stack issue. If an improvement is required, then we need a simpler solution. > > > kernel/leon_kernel.c: cpumask_t mask; > > It's in irqchip::irq_set_affinity(), which is in atomic context(raw spinlock(s) held), > so dynamic allocation is not a good idea. > > My proposal(*untested*) is somewhat complicated for it introduces a new helper. > > diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c > index 4c61da491fee..6eced7acb8bc 100644 > --- a/arch/sparc/kernel/leon_kernel.c > +++ b/arch/sparc/kernel/leon_kernel.c > @@ -104,15 +104,25 @@ unsigned long leon_get_irqmask(unsigned int irq) > } > > #ifdef CONFIG_SMP > + > +static bool cpumask_include(const struct cpumask *srcp1, const struct cpumask *srcp2) > +{ > + unsigned int cpu; > + > + for_each_cpu(cpu, srcp2) { > + if (!cpumask_test_cpu(cpu, srcp1)) > + return false; > + } > + > + return true; > +} > + > static int irq_choose_cpu(const struct cpumask *affinity) > { > - cpumask_t mask; > + unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask); > > - cpumask_and(&mask, cpu_online_mask, affinity); > - if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask)) > - return boot_cpu_id; > - else > - return cpumask_first(&mask); > + return cpumask_include(affinity, cpu_online_mask) || cpu >= nr_cpu_ids ? > + boot_cpu_id : cpu; > } I think something like the following should do the trick. if (cpumask_equal(affinity, cpu_online_mask)) return boot_cpu_id; cpuid = cpumask_first_and(affinity, cpu_online_mask); if (cpuid < nr_cpu_ids) return cpuid; else return boot_cpu_id; If the passed affinity equals the online cpu's, then use the boot cpu. Else, use the first online cpu in the affinity mask. If none found use the boot cpu. > #else > #define irq_choose_cpu(affinity) boot_cpu_id > > Is it OK? > > [cc Yury for bitmap API] > > > kernel/leon_smp.c:static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > kernel/sun4d_smp.c:static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, Looks simple, just pass a pointer and not by value. > > Actually I am awared of existence of (at least some of) them, but so far I > have not found a _proper_ way of dealing with them(especially for case of > ds.c). > > Please lemme dig into it. Looks forward to next iteration. Sam
© 2016 - 2024 Red Hat, Inc.