Extend the capabilities of the generic CPU to hardware queue (hctx)
mapping code, so it maps houskeeping CPUs and isolated CPUs to the
hardware queues evenly.
A hctx is only operational when there is at least one online
housekeeping CPU assigned (aka active_hctx). Thus, check the final
mapping that there is no hctx which has only offline housekeeing CPU and
online isolated CPUs.
Example mapping result:
16 online CPUs
isolcpus=io_queue,2-3,6-7,12-13
Queue mapping:
hctx0: default 0 2
hctx1: default 1 3
hctx2: default 4 6
hctx3: default 5 7
hctx4: default 8 12
hctx5: default 9 13
hctx6: default 10
hctx7: default 11
hctx8: default 14
hctx9: default 15
IRQ mapping:
irq 42 affinity 0 effective 0 nvme0q0
irq 43 affinity 0 effective 0 nvme0q1
irq 44 affinity 1 effective 1 nvme0q2
irq 45 affinity 4 effective 4 nvme0q3
irq 46 affinity 5 effective 5 nvme0q4
irq 47 affinity 8 effective 8 nvme0q5
irq 48 affinity 9 effective 9 nvme0q6
irq 49 affinity 10 effective 10 nvme0q7
irq 50 affinity 11 effective 11 nvme0q8
irq 51 affinity 14 effective 14 nvme0q9
irq 52 affinity 15 effective 15 nvme0q10
A corner case is when the number of online CPUs and present CPUs
differ and the driver asks for less queues than online CPUs, e.g.
8 online CPUs, 16 possible CPUs
isolcpus=io_queue,2-3,6-7,12-13
virtio_blk.num_request_queues=2
Queue mapping:
hctx0: default 0 1 2 3 4 5 6 7 8 12 13
hctx1: default 9 10 11 14 15
IRQ mapping
irq 27 affinity 0 effective 0 virtio0-config
irq 28 affinity 0-1,4-5,8 effective 5 virtio0-req.0
irq 29 affinity 9-11,14-15 effective 0 virtio0-req.1
Signed-off-by: Daniel Wagner <wagi@kernel.org>
---
block/blk-mq-cpumap.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 191 insertions(+), 3 deletions(-)
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8244ecf878358c0b8de84458dcd5100c2f360213..4cb2724a78e13216e50f0e6b1a18f19ea41a54f8 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -17,12 +17,25 @@
#include "blk.h"
#include "blk-mq.h"
+static struct cpumask blk_hk_online_mask;
+
static unsigned int blk_mq_num_queues(const struct cpumask *mask,
unsigned int max_queues)
{
unsigned int num;
- num = cpumask_weight(mask);
+ if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
+ const struct cpumask *hk_mask;
+ struct cpumask avail_mask;
+
+ hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+ cpumask_and(&avail_mask, mask, hk_mask);
+
+ num = cpumask_weight(&avail_mask);
+ } else {
+ num = cpumask_weight(mask);
+ }
+
return min_not_zero(num, max_queues);
}
@@ -31,9 +44,13 @@ static unsigned int blk_mq_num_queues(const struct cpumask *mask,
*
* Returns an affinity mask that represents the queue-to-CPU mapping
* requested by the block layer based on possible CPUs.
+ * This helper takes isolcpus settings into account.
*/
const struct cpumask *blk_mq_possible_queue_affinity(void)
{
+ if (housekeeping_enabled(HK_TYPE_IO_QUEUE))
+ return housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
return cpu_possible_mask;
}
EXPORT_SYMBOL_GPL(blk_mq_possible_queue_affinity);
@@ -46,6 +63,12 @@ EXPORT_SYMBOL_GPL(blk_mq_possible_queue_affinity);
*/
const struct cpumask *blk_mq_online_queue_affinity(void)
{
+ if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
+ cpumask_and(&blk_hk_online_mask, cpu_online_mask,
+ housekeeping_cpumask(HK_TYPE_IO_QUEUE));
+ return &blk_hk_online_mask;
+ }
+
return cpu_online_mask;
}
EXPORT_SYMBOL_GPL(blk_mq_online_queue_affinity);
@@ -57,7 +80,8 @@ EXPORT_SYMBOL_GPL(blk_mq_online_queue_affinity);
* ignored.
*
* Calculates the number of queues to be used for a multiqueue
- * device based on the number of possible CPUs.
+ * device based on the number of possible CPUs. This helper
+ * takes isolcpus settings into account.
*/
unsigned int blk_mq_num_possible_queues(unsigned int max_queues)
{
@@ -72,7 +96,8 @@ EXPORT_SYMBOL_GPL(blk_mq_num_possible_queues);
* ignored.
*
* Calculates the number of queues to be used for a multiqueue
- * device based on the number of online CPUs.
+ * device based on the number of online CPUs. This helper
+ * takes isolcpus settings into account.
*/
unsigned int blk_mq_num_online_queues(unsigned int max_queues)
{
@@ -80,11 +105,169 @@ unsigned int blk_mq_num_online_queues(unsigned int max_queues)
}
EXPORT_SYMBOL_GPL(blk_mq_num_online_queues);
+static bool blk_mq_hk_validate(struct blk_mq_queue_map *qmap,
+ const struct cpumask *active_hctx)
+{
+ /*
+ * Verify if the mapping is usable.
+ *
+ * First, mark all hctx which have at least online houskeeping
+ * CPU assigned.
+ */
+ for (int queue = 0; queue < qmap->nr_queues; queue++) {
+ int cpu;
+
+ if (cpumask_test_cpu(queue, active_hctx)) {
+ /*
+ * This htcx has at least one online houskeeping
+ * CPU thus it is able to serve any assigned
+ * isolated CPU.
+ */
+ continue;
+ }
+
+ /*
+ * There is no online houskeeping CPU for this hctx, all
+ * good as long as all isolated CPUs are also offline.
+ */
+ for_each_online_cpu(cpu) {
+ if (qmap->mq_map[cpu] != queue)
+ continue;
+
+ pr_warn("Unable to create a usable CPU-to-queue mapping with the given constraints\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * blk_mq_map_hk_queues - Create housekeeping CPU to
+ * hardware queue mapping
+ * @qmap: CPU to hardware queue map
+ *
+ * Create a housekeeping CPU to hardware queue mapping in @qmap. @qmap
+ * contains a valid configuration honoring the isolcpus configuration.
+ */
+static void blk_mq_map_hk_queues(struct blk_mq_queue_map *qmap)
+{
+ cpumask_var_t active_hctx __free(free_cpumask_var) = NULL;
+ struct cpumask *hk_masks __free(kfree) = NULL;
+ const struct cpumask *mask;
+ unsigned int queue, cpu, nr_masks;
+
+ if (housekeeping_enabled(HK_TYPE_IO_QUEUE))
+ mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+ else
+ goto fallback;
+
+ if (!zalloc_cpumask_var(&active_hctx, GFP_KERNEL))
+ goto fallback;
+
+ /* Map housekeeping CPUs to a hctx */
+ hk_masks = group_mask_cpus_evenly(qmap->nr_queues, mask, &nr_masks);
+ if (!hk_masks)
+ goto fallback;
+
+ for (queue = 0; queue < qmap->nr_queues; queue++) {
+ unsigned int idx = (qmap->queue_offset + queue) % nr_masks;
+
+ for_each_cpu(cpu, &hk_masks[idx]) {
+ qmap->mq_map[cpu] = idx;
+
+ if (cpu_online(cpu))
+ cpumask_set_cpu(qmap->mq_map[cpu], active_hctx);
+ }
+ }
+
+ /* Map isolcpus to hardware context */
+ queue = cpumask_first(active_hctx);
+ for_each_cpu_andnot(cpu, cpu_possible_mask, mask) {
+ qmap->mq_map[cpu] = (qmap->queue_offset + queue) % nr_masks;
+ queue = cpumask_next_wrap(queue, active_hctx);
+ }
+
+ if (!blk_mq_hk_validate(qmap, active_hctx))
+ goto fallback;
+
+ return;
+
+fallback:
+ /*
+ * Map all CPUs to the first hctx to ensure at least one online
+ * housekeeping CPU is serving it.
+ */
+ for_each_possible_cpu(cpu)
+ qmap->mq_map[cpu] = 0;
+}
+
+/*
+ * blk_mq_map_hk_irq_queues - Create housekeeping CPU to
+ * hardware queue mapping
+ * @dev: The device to map queues
+ * @qmap: CPU to hardware queue map
+ * @offset: Queue offset to use for the device
+ *
+ * Create a housekeeping CPU to hardware queue mapping in @qmap. @qmap
+ * contains a valid configuration honoring the isolcpus configuration.
+ */
+static void blk_mq_map_hk_irq_queues(struct device *dev,
+ struct blk_mq_queue_map *qmap,
+ int offset)
+{
+ cpumask_var_t active_hctx __free(free_cpumask_var) = NULL;
+ cpumask_var_t mask __free(free_cpumask_var) = NULL;
+ unsigned int queue, cpu;
+
+ if (!zalloc_cpumask_var(&active_hctx, GFP_KERNEL))
+ goto fallback;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ goto fallback;
+
+ /* Map housekeeping CPUs to a hctx */
+ for (queue = 0; queue < qmap->nr_queues; queue++) {
+ for_each_cpu(cpu, dev->bus->irq_get_affinity(dev, offset + queue)) {
+ qmap->mq_map[cpu] = qmap->queue_offset + queue;
+
+ cpumask_set_cpu(cpu, mask);
+ if (cpu_online(cpu))
+ cpumask_set_cpu(qmap->mq_map[cpu], active_hctx);
+ }
+ }
+
+ /* Map isolcpus to hardware context */
+ queue = cpumask_first(active_hctx);
+ for_each_cpu_andnot(cpu, cpu_possible_mask, mask) {
+ qmap->mq_map[cpu] = qmap->queue_offset + queue;
+ queue = cpumask_next_wrap(queue, active_hctx);
+ }
+
+ if (!blk_mq_hk_validate(qmap, active_hctx))
+ goto fallback;
+
+ return;
+
+fallback:
+ /*
+ * Map all CPUs to the first hctx to ensure at least one online
+ * housekeeping CPU is serving it.
+ */
+ for_each_possible_cpu(cpu)
+ qmap->mq_map[cpu] = 0;
+}
+
void blk_mq_map_queues(struct blk_mq_queue_map *qmap)
{
const struct cpumask *masks;
unsigned int queue, cpu, nr_masks;
+ if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
+ blk_mq_map_hk_queues(qmap);
+ return;
+ }
+
masks = group_cpus_evenly(qmap->nr_queues, &nr_masks);
if (!masks) {
for_each_possible_cpu(cpu)
@@ -139,6 +322,11 @@ void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap,
if (!dev->bus->irq_get_affinity)
goto fallback;
+ if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
+ blk_mq_map_hk_irq_queues(dev, qmap, offset);
+ return;
+ }
+
for (queue = 0; queue < qmap->nr_queues; queue++) {
mask = dev->bus->irq_get_affinity(dev, queue + offset);
if (!mask)
--
2.50.0
Hi Daniel,
kernel test robot noticed the following build errors:
[auto build test ERROR on 32f85e8468ce081d8e73ca3f0d588f1004013037]
url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Wagner/lib-group_cpus-Add-group_masks_cpus_evenly/20250703-003811
base: 32f85e8468ce081d8e73ca3f0d588f1004013037
patch link: https://lore.kernel.org/r/20250702-isolcpus-io-queues-v7-8-557aa7eacce4%40kernel.org
patch subject: [PATCH v7 08/10] blk-mq: use hk cpus only when isolcpus=io_queue is enabled
config: arm-allnoconfig (https://download.01.org/0day-ci/archive/20250703/202507032238.AoTmQnGP-lkp@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project f1a4bb62452d88a0edd9340b3ca7c9b11ad9193f)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250703/202507032238.AoTmQnGP-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507032238.AoTmQnGP-lkp@intel.com/
All errors (new ones prefixed by >>):
>> block/blk-mq-cpumap.c:155:16: error: array initializer must be an initializer list
155 | cpumask_var_t active_hctx __free(free_cpumask_var) = NULL;
| ^
block/blk-mq-cpumap.c:219:16: error: array initializer must be an initializer list
219 | cpumask_var_t active_hctx __free(free_cpumask_var) = NULL;
| ^
block/blk-mq-cpumap.c:220:16: error: array initializer must be an initializer list
220 | cpumask_var_t mask __free(free_cpumask_var) = NULL;
| ^
3 errors generated.
vim +155 block/blk-mq-cpumap.c
144
145 /*
146 * blk_mq_map_hk_queues - Create housekeeping CPU to
147 * hardware queue mapping
148 * @qmap: CPU to hardware queue map
149 *
150 * Create a housekeeping CPU to hardware queue mapping in @qmap. @qmap
151 * contains a valid configuration honoring the isolcpus configuration.
152 */
153 static void blk_mq_map_hk_queues(struct blk_mq_queue_map *qmap)
154 {
> 155 cpumask_var_t active_hctx __free(free_cpumask_var) = NULL;
156 struct cpumask *hk_masks __free(kfree) = NULL;
157 const struct cpumask *mask;
158 unsigned int queue, cpu, nr_masks;
159
160 if (housekeeping_enabled(HK_TYPE_IO_QUEUE))
161 mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
162 else
163 goto fallback;
164
165 if (!zalloc_cpumask_var(&active_hctx, GFP_KERNEL))
166 goto fallback;
167
168 /* Map housekeeping CPUs to a hctx */
169 hk_masks = group_mask_cpus_evenly(qmap->nr_queues, mask, &nr_masks);
170 if (!hk_masks)
171 goto fallback;
172
173 for (queue = 0; queue < qmap->nr_queues; queue++) {
174 unsigned int idx = (qmap->queue_offset + queue) % nr_masks;
175
176 for_each_cpu(cpu, &hk_masks[idx]) {
177 qmap->mq_map[cpu] = idx;
178
179 if (cpu_online(cpu))
180 cpumask_set_cpu(qmap->mq_map[cpu], active_hctx);
181 }
182 }
183
184 /* Map isolcpus to hardware context */
185 queue = cpumask_first(active_hctx);
186 for_each_cpu_andnot(cpu, cpu_possible_mask, mask) {
187 qmap->mq_map[cpu] = (qmap->queue_offset + queue) % nr_masks;
188 queue = cpumask_next_wrap(queue, active_hctx);
189 }
190
191 if (!blk_mq_hk_validate(qmap, active_hctx))
192 goto fallback;
193
194 return;
195
196 fallback:
197 /*
198 * Map all CPUs to the first hctx to ensure at least one online
199 * housekeeping CPU is serving it.
200 */
201 for_each_possible_cpu(cpu)
202 qmap->mq_map[cpu] = 0;
203 }
204
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Wed, Jul 02, 2025 at 06:33:58PM +0200, Daniel Wagner wrote:
> const struct cpumask *blk_mq_possible_queue_affinity(void)
> {
> + if (housekeeping_enabled(HK_TYPE_IO_QUEUE))
> + return housekeeping_cpumask(HK_TYPE_IO_QUEUE);
> +
> return cpu_possible_mask;
> }
I'm no expert on the housekeeping stuff, but why isn't the
housekeeping_enabled check done in housekeeping_cpumask directly so
that the drivers could use housekeeping_cpumask without a blk-mq
wrapper?
On Thu, Jul 03, 2025 at 11:01:58AM +0200, Christoph Hellwig wrote:
> On Wed, Jul 02, 2025 at 06:33:58PM +0200, Daniel Wagner wrote:
> > const struct cpumask *blk_mq_possible_queue_affinity(void)
> > {
> > + if (housekeeping_enabled(HK_TYPE_IO_QUEUE))
> > + return housekeeping_cpumask(HK_TYPE_IO_QUEUE);
> > +
> > return cpu_possible_mask;
> > }
>
> I'm no expert on the housekeeping stuff, but why isn't the
> housekeeping_enabled check done in housekeeping_cpumask directly so
> that the drivers could use housekeeping_cpumask without a blk-mq
> wrapper?
Yes, housekeeping_cpumask will return cpu_possible_mask when housekeping
is disabled. Though some drivers want cpu_online_mask instead. If all
drivers would agree on one version of the mask it should allow to drop
to these helpers (maybe we the houskeeping API needs to be extended then
though)
This is also what Hannes brought up. If the number of supported hardware
queues for a device is less than cpu_possible_mask, it really makes
sense to distribute the hardware queues only between the online cpus. I
think the only two drivers which are interested in the cpu_possible_mask
are nvme-pci and virtio.
On Fri, Jul 04, 2025 at 11:00:56AM +0200, Daniel Wagner wrote: > > I'm no expert on the housekeeping stuff, but why isn't the > > housekeeping_enabled check done in housekeeping_cpumask directly so > > that the drivers could use housekeeping_cpumask without a blk-mq > > wrapper? > > Yes, housekeeping_cpumask will return cpu_possible_mask when housekeping > is disabled. Though some drivers want cpu_online_mask instead. If all > drivers would agree on one version of the mask it should allow to drop > to these helpers (maybe we the houskeeping API needs to be extended then > though) Drivers don't get cpu hotplug notifications, so cpu_possible_mask is the only valid answer right now. That could change if we ever implement notifications to the drivers. > This is also what Hannes brought up. If the number of supported hardware > queues for a device is less than cpu_possible_mask, it really makes > sense to distribute the hardware queues only between the online cpus. I > think the only two drivers which are interested in the cpu_possible_mask > are nvme-pci and virtio. That's the only two drivers that get it right :(
On 7/2/25 18:33, Daniel Wagner wrote:
> Extend the capabilities of the generic CPU to hardware queue (hctx)
> mapping code, so it maps houskeeping CPUs and isolated CPUs to the
> hardware queues evenly.
>
> A hctx is only operational when there is at least one online
> housekeeping CPU assigned (aka active_hctx). Thus, check the final
> mapping that there is no hctx which has only offline housekeeing CPU and
> online isolated CPUs.
>
> Example mapping result:
>
> 16 online CPUs
>
> isolcpus=io_queue,2-3,6-7,12-13
>
> Queue mapping:
> hctx0: default 0 2
> hctx1: default 1 3
> hctx2: default 4 6
> hctx3: default 5 7
> hctx4: default 8 12
> hctx5: default 9 13
> hctx6: default 10
> hctx7: default 11
> hctx8: default 14
> hctx9: default 15
>
> IRQ mapping:
> irq 42 affinity 0 effective 0 nvme0q0
> irq 43 affinity 0 effective 0 nvme0q1
> irq 44 affinity 1 effective 1 nvme0q2
> irq 45 affinity 4 effective 4 nvme0q3
> irq 46 affinity 5 effective 5 nvme0q4
> irq 47 affinity 8 effective 8 nvme0q5
> irq 48 affinity 9 effective 9 nvme0q6
> irq 49 affinity 10 effective 10 nvme0q7
> irq 50 affinity 11 effective 11 nvme0q8
> irq 51 affinity 14 effective 14 nvme0q9
> irq 52 affinity 15 effective 15 nvme0q10
>
> A corner case is when the number of online CPUs and present CPUs
> differ and the driver asks for less queues than online CPUs, e.g.
>
> 8 online CPUs, 16 possible CPUs
>
> isolcpus=io_queue,2-3,6-7,12-13
> virtio_blk.num_request_queues=2
>
> Queue mapping:
> hctx0: default 0 1 2 3 4 5 6 7 8 12 13
> hctx1: default 9 10 11 14 15
>
> IRQ mapping
> irq 27 affinity 0 effective 0 virtio0-config
> irq 28 affinity 0-1,4-5,8 effective 5 virtio0-req.0
> irq 29 affinity 9-11,14-15 effective 0 virtio0-req.1
>
> Signed-off-by: Daniel Wagner <wagi@kernel.org>
> ---
> block/blk-mq-cpumap.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 191 insertions(+), 3 deletions(-)
>
> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> index 8244ecf878358c0b8de84458dcd5100c2f360213..4cb2724a78e13216e50f0e6b1a18f19ea41a54f8 100644
> --- a/block/blk-mq-cpumap.c
> +++ b/block/blk-mq-cpumap.c
> @@ -17,12 +17,25 @@
> #include "blk.h"
> #include "blk-mq.h"
>
> +static struct cpumask blk_hk_online_mask;
> +
> static unsigned int blk_mq_num_queues(const struct cpumask *mask,
> unsigned int max_queues)
> {
> unsigned int num;
>
> - num = cpumask_weight(mask);
> + if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
> + const struct cpumask *hk_mask;
> + struct cpumask avail_mask;
> +
> + hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
> + cpumask_and(&avail_mask, mask, hk_mask);
> +
> + num = cpumask_weight(&avail_mask);
> + } else {
> + num = cpumask_weight(mask);
> + }
> +
> return min_not_zero(num, max_queues);
> }
>
> @@ -31,9 +44,13 @@ static unsigned int blk_mq_num_queues(const struct cpumask *mask,
> *
> * Returns an affinity mask that represents the queue-to-CPU mapping
> * requested by the block layer based on possible CPUs.
> + * This helper takes isolcpus settings into account.
> */
> const struct cpumask *blk_mq_possible_queue_affinity(void)
> {
> + if (housekeeping_enabled(HK_TYPE_IO_QUEUE))
> + return housekeeping_cpumask(HK_TYPE_IO_QUEUE);
> +
> return cpu_possible_mask;
> }
> EXPORT_SYMBOL_GPL(blk_mq_possible_queue_affinity);
> @@ -46,6 +63,12 @@ EXPORT_SYMBOL_GPL(blk_mq_possible_queue_affinity);
> */
> const struct cpumask *blk_mq_online_queue_affinity(void)
> {
> + if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
> + cpumask_and(&blk_hk_online_mask, cpu_online_mask,
> + housekeeping_cpumask(HK_TYPE_IO_QUEUE));
> + return &blk_hk_online_mask;
> + }
> +
> return cpu_online_mask;
> }
> EXPORT_SYMBOL_GPL(blk_mq_online_queue_affinity);
> @@ -57,7 +80,8 @@ EXPORT_SYMBOL_GPL(blk_mq_online_queue_affinity);
> * ignored.
> *
> * Calculates the number of queues to be used for a multiqueue
> - * device based on the number of possible CPUs.
> + * device based on the number of possible CPUs. This helper
> + * takes isolcpus settings into account.
> */
> unsigned int blk_mq_num_possible_queues(unsigned int max_queues)
> {
> @@ -72,7 +96,8 @@ EXPORT_SYMBOL_GPL(blk_mq_num_possible_queues);
> * ignored.
> *
> * Calculates the number of queues to be used for a multiqueue
> - * device based on the number of online CPUs.
> + * device based on the number of online CPUs. This helper
> + * takes isolcpus settings into account.
> */
> unsigned int blk_mq_num_online_queues(unsigned int max_queues)
> {
> @@ -80,11 +105,169 @@ unsigned int blk_mq_num_online_queues(unsigned int max_queues)
> }
> EXPORT_SYMBOL_GPL(blk_mq_num_online_queues);
>
> +static bool blk_mq_hk_validate(struct blk_mq_queue_map *qmap,
> + const struct cpumask *active_hctx)
> +{
> + /*
> + * Verify if the mapping is usable.
> + *
> + * First, mark all hctx which have at least online houskeeping
> + * CPU assigned.
> + */
> + for (int queue = 0; queue < qmap->nr_queues; queue++) {
> + int cpu;
> +
> + if (cpumask_test_cpu(queue, active_hctx)) {
> + /*
> + * This htcx has at least one online houskeeping
> + * CPU thus it is able to serve any assigned
> + * isolated CPU.
> + */
> + continue;
> + }
> +
> + /*
> + * There is no online houskeeping CPU for this hctx, all
> + * good as long as all isolated CPUs are also offline.
> + */
> + for_each_online_cpu(cpu) {
> + if (qmap->mq_map[cpu] != queue)
> + continue;
> +
> + pr_warn("Unable to create a usable CPU-to-queue mapping with the given constraints\n");
> + return false;
> + }
> + }
> +
> + return true;
> +}
> +
> +/*
> + * blk_mq_map_hk_queues - Create housekeeping CPU to
> + * hardware queue mapping
> + * @qmap: CPU to hardware queue map
> + *
> + * Create a housekeeping CPU to hardware queue mapping in @qmap. @qmap
> + * contains a valid configuration honoring the isolcpus configuration.
> + */
> +static void blk_mq_map_hk_queues(struct blk_mq_queue_map *qmap)
> +{
> + cpumask_var_t active_hctx __free(free_cpumask_var) = NULL;
> + struct cpumask *hk_masks __free(kfree) = NULL;
> + const struct cpumask *mask;
> + unsigned int queue, cpu, nr_masks;
> +
> + if (housekeeping_enabled(HK_TYPE_IO_QUEUE))
> + mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
> + else
> + goto fallback;
> +
> + if (!zalloc_cpumask_var(&active_hctx, GFP_KERNEL))
> + goto fallback;
> +
> + /* Map housekeeping CPUs to a hctx */
> + hk_masks = group_mask_cpus_evenly(qmap->nr_queues, mask, &nr_masks);
> + if (!hk_masks)
> + goto fallback;
> +
> + for (queue = 0; queue < qmap->nr_queues; queue++) {
> + unsigned int idx = (qmap->queue_offset + queue) % nr_masks;
> +
> + for_each_cpu(cpu, &hk_masks[idx]) {
> + qmap->mq_map[cpu] = idx;
> +
> + if (cpu_online(cpu))
> + cpumask_set_cpu(qmap->mq_map[cpu], active_hctx);
Why cpu_online? Up until this point it really didn't matter if the
affinity mask was set to 'online' or 'possible' cpus, but here you
require CPUs to be online...
> + }
> + }
> +
> + /* Map isolcpus to hardware context */
> + queue = cpumask_first(active_hctx);
> + for_each_cpu_andnot(cpu, cpu_possible_mask, mask) {
> + qmap->mq_map[cpu] = (qmap->queue_offset + queue) % nr_masks;
> + queue = cpumask_next_wrap(queue, active_hctx);
> + }
Really? Doesn't this map _all_ cpus, and not just the isolcpus?
> +
> + if (!blk_mq_hk_validate(qmap, active_hctx))
> + goto fallback;
> +
> + return;
> +
> +fallback:
> + /*
> + * Map all CPUs to the first hctx to ensure at least one online
> + * housekeeping CPU is serving it.
> + */
> + for_each_possible_cpu(cpu)
> + qmap->mq_map[cpu] = 0;
I think you need to map all hctx, no?
> +}
> +
> +/*
> + * blk_mq_map_hk_irq_queues - Create housekeeping CPU to
> + * hardware queue mapping
> + * @dev: The device to map queues
> + * @qmap: CPU to hardware queue map
> + * @offset: Queue offset to use for the device
> + *
> + * Create a housekeeping CPU to hardware queue mapping in @qmap. @qmap
> + * contains a valid configuration honoring the isolcpus configuration.
> + */
> +static void blk_mq_map_hk_irq_queues(struct device *dev,
> + struct blk_mq_queue_map *qmap,
> + int offset)
> +{
> + cpumask_var_t active_hctx __free(free_cpumask_var) = NULL;
> + cpumask_var_t mask __free(free_cpumask_var) = NULL;
> + unsigned int queue, cpu;
> +
> + if (!zalloc_cpumask_var(&active_hctx, GFP_KERNEL))
> + goto fallback;
> +
> + if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
> + goto fallback;
> +
> + /* Map housekeeping CPUs to a hctx */
> + for (queue = 0; queue < qmap->nr_queues; queue++) {
> + for_each_cpu(cpu, dev->bus->irq_get_affinity(dev, offset + queue)) {
> + qmap->mq_map[cpu] = qmap->queue_offset + queue;
> +
> + cpumask_set_cpu(cpu, mask);
> + if (cpu_online(cpu))
> + cpumask_set_cpu(qmap->mq_map[cpu], active_hctx);
Now that is really curious. You pick up the interrupt affinity from the
'bus', which, I assume, is the PCI bus. And this would imply that the
bus can (or already is) programmed for this interrupt affinity.
Which would imply that this is a usable interrupt affinity from the
hardware perspective, irrespective on whether the cpu is online or not.
So why the check to cpu_online()? Can't we simply take the existing
affinity and rely on the hardware to do the right thing?
> + }
> + }
> +
> + /* Map isolcpus to hardware context */
> + queue = cpumask_first(active_hctx);
> + for_each_cpu_andnot(cpu, cpu_possible_mask, mask) {
> + qmap->mq_map[cpu] = qmap->queue_offset + queue;
> + queue = cpumask_next_wrap(queue, active_hctx);
> + }
> +
> + if (!blk_mq_hk_validate(qmap, active_hctx))
> + goto fallback;
> +
> + return;
> +
> +fallback:
> + /*
> + * Map all CPUs to the first hctx to ensure at least one online
> + * housekeeping CPU is serving it.
> + */
> + for_each_possible_cpu(cpu)
> + qmap->mq_map[cpu] = 0;
Same comment as previously; don't we need to map all hctx?
> +}
> +
> void blk_mq_map_queues(struct blk_mq_queue_map *qmap)
> {
> const struct cpumask *masks;
> unsigned int queue, cpu, nr_masks;
>
> + if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
> + blk_mq_map_hk_queues(qmap);
> + return;
> + }
> +
> masks = group_cpus_evenly(qmap->nr_queues, &nr_masks);
> if (!masks) {
> for_each_possible_cpu(cpu)
> @@ -139,6 +322,11 @@ void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap,
> if (!dev->bus->irq_get_affinity)
> goto fallback;
>
> + if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
> + blk_mq_map_hk_irq_queues(dev, qmap, offset);
> + return;
> + }
> +
> for (queue = 0; queue < qmap->nr_queues; queue++) {
> mask = dev->bus->irq_get_affinity(dev, queue + offset);
> if (!mask)
>
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
On Thu, Jul 03, 2025 at 08:58:02AM +0200, Hannes Reinecke wrote:
> > + for (queue = 0; queue < qmap->nr_queues; queue++) {
> > + unsigned int idx = (qmap->queue_offset + queue) % nr_masks;
> > +
> > + for_each_cpu(cpu, &hk_masks[idx]) {
> > + qmap->mq_map[cpu] = idx;
> > +
> > + if (cpu_online(cpu))
> > + cpumask_set_cpu(qmap->mq_map[cpu], active_hctx);
>
> Why cpu_online? Up until this point it really didn't matter if the affinity
> mask was set to 'online' or 'possible' cpus, but here you
> require CPUs to be online...
This part here tracks if the a hardware context has at least one
housekeeping CPU online. It is possible to provide configuration where
we end up with hardware context which have offline housekeeping CPUs and
online isolcpus. active_hctx tracks which of the hardware contexts is
usable which is used in the next step...
> > + }
> > + }
> > +
> > + /* Map isolcpus to hardware context */
> > + queue = cpumask_first(active_hctx);
> > + for_each_cpu_andnot(cpu, cpu_possible_mask, mask) {
> > + qmap->mq_map[cpu] = (qmap->queue_offset + queue) % nr_masks;
> > + queue = cpumask_next_wrap(queue, active_hctx);
> > + }
>
> Really? Doesn't this map _all_ cpus, and not just the isolcpus?
for_each_cpu iterates over is all CPUs which are not houskeeping CPUs
(mask is the housekeeping mask), thus these are all isol CPU. Note the
'andnot' part.
The cpumask_first/cpumask_next_wrap returns only hardware context which
have at least one housekeeping CPU which is online. Yes, it possible to
make this a bit smarter, so that we keep the grouping of the offline
CPUs intact, though I am not sure if it is worth to add complexity for a
corner case at least not yet.
> > +fallback:
> > + /*
> > + * Map all CPUs to the first hctx to ensure at least one online
> > + * housekeeping CPU is serving it.
> > + */
> > + for_each_possible_cpu(cpu)
> > + qmap->mq_map[cpu] = 0;
>
> I think you need to map all hctx, no?
The block layer is filtering out hctx which have no CPU assigned to it
when selecting a queue. This is really a failsafe mode, it just makes
sure the system boots.
> > + /* Map housekeeping CPUs to a hctx */
> > + for (queue = 0; queue < qmap->nr_queues; queue++) {
> > + for_each_cpu(cpu, dev->bus->irq_get_affinity(dev, offset + queue)) {
> > + qmap->mq_map[cpu] = qmap->queue_offset + queue;
> > +
> > + cpumask_set_cpu(cpu, mask);
> > + if (cpu_online(cpu))
> > + cpumask_set_cpu(qmap->mq_map[cpu], active_hctx);
>
> Now that is really curious. You pick up the interrupt affinity from the
> 'bus', which, I assume, is the PCI bus. And this would imply that the
> bus can (or already is) programmed for this interrupt affinity.
Yes, this is the case. irq_create_affinity_masks which use
group_cpu_evenly/group_mask_cpu_evenly for the number of requested IRQs.
The number of IRQs can be higher than the number of requested queues
here. It's necessary to use the affinity mask created by
irq_create_affinity_mask as input.
> Which would imply that this is a usable interrupt affinity from the
> hardware perspective, irrespective on whether the cpu is online or
> not. So why the check to cpu_online()? Can't we simply take the existing affinity
> and rely on the hardware to do the right thing?
Again, this is tracking if a htcx has online housekeeping CPU.
© 2016 - 2025 Red Hat, Inc.