From: Ming Lei <ming.lei@redhat.com>
blk_mq_pci_map_queues and blk_mq_virtio_map_queues will create a CPU to
hardware queue mapping based on affinity information. These two
function share code which only differs on how the affinity information
is retrieved. Also there is the hisi_sas which open codes the same loop.
Thus introduce a new helper function for creating these mappings which
takes an callback function for fetching the affinity mask. Also
introduce common helper function for PCI and virtio devices to retrieve
affinity masks.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
[dwagner: - removed fallback mapping
- added affinity helpers (moved to pci/virtio)
- updated commit message]
Signed-off-by: Daniel Wagner <dwagner@suse.de>
---
block/blk-mq-cpumap.c | 35 +++++++++++++++++++++++++++++++++++
drivers/pci/pci.c | 20 ++++++++++++++++++++
drivers/virtio/virtio.c | 31 +++++++++++++++++++++++++++++++
include/linux/blk-mq.h | 5 +++++
include/linux/pci.h | 11 +++++++++++
include/linux/virtio.h | 13 +++++++++++++
6 files changed, 115 insertions(+)
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 9638b25fd521..c4993c0f822e 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -54,3 +54,38 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
return NUMA_NO_NODE;
}
+
+/**
+ * blk_mq_hctx_map_queues - Create CPU to hardware queue mapping
+ * @qmap: CPU to hardware queue map.
+ * @dev_off: Offset to use for the device.
+ * @dev_data: Device data passed to get_queue_affinity().
+ * @get_queue_affinity: Callback to retrieve queue affinity.
+ *
+ * Create a CPU to hardware queue mapping in @qmap. For each queue
+ * @get_queue_affinity will be called to retrieve the affinity for given
+ * queue.
+ */
+void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
+ void *dev_data, int dev_off,
+ get_queue_affinty_fn *get_queue_affinity)
+{
+ const struct cpumask *mask;
+ unsigned int queue, cpu;
+
+ for (queue = 0; queue < qmap->nr_queues; queue++) {
+ mask = get_queue_affinity(dev_data, dev_off, queue);
+ if (!mask)
+ goto fallback;
+
+ for_each_cpu(cpu, mask)
+ qmap->mq_map[cpu] = qmap->queue_offset + queue;
+ }
+
+ return;
+
+fallback:
+ WARN_ON_ONCE(qmap->nr_queues > 1);
+ blk_mq_clear_mq_map(qmap);
+}
+EXPORT_SYMBOL_GPL(blk_mq_hctx_map_queues);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e3a49f66982d..84f9c16b813b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6370,6 +6370,26 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode,
return 0;
}
+#ifdef CONFIG_BLK_MQ_PCI
+/**
+ * pci_get_blk_mq_affinity - get affinity mask queue mapping for PCI device
+ * @dev_data: Pointer to struct pci_dev.
+ * @offset: Offset to use for the pci irq vector
+ * @queue: Queue index
+ *
+ * This function returns for a queue the affinity mask for a PCI device.
+ * It is usually used as callback for blk_mq_hctx_map_queues().
+ */
+const struct cpumask *pci_get_blk_mq_affinity(void *dev_data, int offset,
+ int queue)
+{
+ struct pci_dev *pdev = dev_data;
+
+ return pci_irq_get_affinity(pdev, offset + queue);
+}
+EXPORT_SYMBOL_GPL(pci_get_blk_mq_affinity);
+#endif
+
#ifdef CONFIG_ACPI
bool pci_pr3_present(struct pci_dev *pdev)
{
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index a9b93e99c23a..21667309ca9a 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -592,6 +592,37 @@ int virtio_device_restore(struct virtio_device *dev)
EXPORT_SYMBOL_GPL(virtio_device_restore);
#endif
+const struct cpumask *virtio_get_vq_affinity(struct virtio_device *dev,
+ int index)
+{
+ if (!dev->config->get_vq_affinity)
+ return NULL;
+
+ return dev->config->get_vq_affinity(dev, index);
+}
+EXPORT_SYMBOL_GPL(virtio_get_vq_affinity);
+
+#ifdef CONFIG_BLK_MQ_VIRTIO
+/**
+ * virtio_get_blk_mq_affinity - get affinity mask queue mapping for
+ * virtio device
+ * @dev_data: Pointer to struct virtio_device.
+ * @offset: Offset to use for the virtio irq vector
+ * @queue: Queue index
+ *
+ * This function returns for a queue the affinity mask for a virtio device.
+ * It is usually used as callback for blk_mq_hctx_map_queues().
+ */
+const struct cpumask *virtio_get_blk_mq_affinity(void *dev_data,
+ int offset, int queue)
+{
+ struct virtio_device *vdev = dev_data;
+
+ return virtio_get_vq_affinity(vdev, offset + queue);
+}
+EXPORT_SYMBOL_GPL(virtio_get_blk_mq_affinity);
+#endif
+
static int virtio_init(void)
{
if (bus_register(&virtio_bus) != 0)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8d304b1d16b1..b9881a8794af 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -929,7 +929,12 @@ void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
+typedef const struct cpumask *(get_queue_affinty_fn)(void *dev_data,
+ int dev_off, int queue_idx);
void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
+void blk_mq_hctx_map_queues(struct blk_mq_queue_map *qmap,
+ void *dev_data, int dev_off,
+ get_queue_affinty_fn *get_queue_affinity);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
void blk_mq_quiesce_queue_nowait(struct request_queue *q);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4cf89a4b4cbc..97f4797b5060 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1633,6 +1633,17 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
int pci_set_vga_state(struct pci_dev *pdev, bool decode,
unsigned int command_bits, u32 flags);
+#ifdef CONFIG_BLK_MQ_PCI
+const struct cpumask *pci_get_blk_mq_affinity(void *dev_data,
+ int offset, int queue);
+#else
+static inline const struct cpumask *pci_get_blk_mq_affinity(void *dev_data,
+ int offset, int queue)
+{
+ return cpu_possible_mask;
+}
+#endif
+
/*
* Virtual interrupts allow for more interrupts to be allocated
* than the device has interrupts for. These are not programmed
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index ecc5cb7b8c91..49d4f7353e5c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -170,6 +170,19 @@ int virtio_device_restore(struct virtio_device *dev);
void virtio_reset_device(struct virtio_device *dev);
size_t virtio_max_dma_size(const struct virtio_device *vdev);
+const struct cpumask *virtio_get_vq_affinity(struct virtio_device *dev,
+ int index);
+
+#ifdef CONFIG_BLK_MQ_VIRTIO
+const struct cpumask *virtio_get_blk_mq_affinity(void *dev_data,
+ int offset, int queue);
+#else
+static inline const struct cpumask *virtio_get_blk_mq_affinity(void *dev_data,
+ int offset, int queue)
+{
+ return cpu_possible_mask;
+}
+#endif
#define virtio_device_for_each_vq(vdev, vq) \
list_for_each_entry(vq, &vdev->vqs, list)
--
2.46.0
On Fri, Sep 13, 2024 at 09:41:59AM +0200, Daniel Wagner wrote: > From: Ming Lei <ming.lei@redhat.com> > > blk_mq_pci_map_queues and blk_mq_virtio_map_queues will create a CPU to > hardware queue mapping based on affinity information. These two > function share code which only differs on how the affinity information > is retrieved. Also there is the hisi_sas which open codes the same loop. > > Thus introduce a new helper function for creating these mappings which > takes an callback function for fetching the affinity mask. Also > introduce common helper function for PCI and virtio devices to retrieve > affinity masks. > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index e3a49f66982d..84f9c16b813b 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -6370,6 +6370,26 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode, > return 0; > } > > +#ifdef CONFIG_BLK_MQ_PCI > +/** > + * pci_get_blk_mq_affinity - get affinity mask queue mapping for PCI device > + * @dev_data: Pointer to struct pci_dev. > + * @offset: Offset to use for the pci irq vector > + * @queue: Queue index > + * > + * This function returns for a queue the affinity mask for a PCI device. > + * It is usually used as callback for blk_mq_hctx_map_queues(). > + */ > +const struct cpumask *pci_get_blk_mq_affinity(void *dev_data, int offset, > + int queue) > +{ > + struct pci_dev *pdev = dev_data; > + > + return pci_irq_get_affinity(pdev, offset + queue); > +} > +EXPORT_SYMBOL_GPL(pci_get_blk_mq_affinity); > +#endif IMO this doesn't really fit well in drivers/pci since it doesn't add any PCI-specific knowledge or require any PCI core internals, and the parameters are blk-specific. I don't object to the code, but it seems like it could go somewhere in block/? Bjorn
On Fri, Sep 13, 2024 at 11:26:54AM -0500, Bjorn Helgaas wrote: > > +const struct cpumask *pci_get_blk_mq_affinity(void *dev_data, int offset, > > + int queue) > > +{ > > + struct pci_dev *pdev = dev_data; > > + > > + return pci_irq_get_affinity(pdev, offset + queue); > > +} > > +EXPORT_SYMBOL_GPL(pci_get_blk_mq_affinity); > > +#endif > > IMO this doesn't really fit well in drivers/pci since it doesn't add > any PCI-specific knowledge or require any PCI core internals, and the > parameters are blk-specific. I don't object to the code, but it seems > like it could go somewhere in block/? That's where it, or rather the current equivalent, lives, which is a bit silly. That being said, I suspect the nicest thing would be to offer a real irq_get_affinity interface at the bus level. e.g. add something like: const struct cpumask *(*irq_get_affinity(struct device *dev, unsigned int irq_vec); to struct bus_type so that any layer can just query the irq affinity for buses that support it without extra glue code.
On 9/13/24 10:26 AM, Bjorn Helgaas wrote: > On Fri, Sep 13, 2024 at 09:41:59AM +0200, Daniel Wagner wrote: >> From: Ming Lei <ming.lei@redhat.com> >> >> blk_mq_pci_map_queues and blk_mq_virtio_map_queues will create a CPU to >> hardware queue mapping based on affinity information. These two >> function share code which only differs on how the affinity information >> is retrieved. Also there is the hisi_sas which open codes the same loop. >> >> Thus introduce a new helper function for creating these mappings which >> takes an callback function for fetching the affinity mask. Also >> introduce common helper function for PCI and virtio devices to retrieve >> affinity masks. > >> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c >> index e3a49f66982d..84f9c16b813b 100644 >> --- a/drivers/pci/pci.c >> +++ b/drivers/pci/pci.c >> @@ -6370,6 +6370,26 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode, >> return 0; >> } >> >> +#ifdef CONFIG_BLK_MQ_PCI >> +/** >> + * pci_get_blk_mq_affinity - get affinity mask queue mapping for PCI device >> + * @dev_data: Pointer to struct pci_dev. >> + * @offset: Offset to use for the pci irq vector >> + * @queue: Queue index >> + * >> + * This function returns for a queue the affinity mask for a PCI device. >> + * It is usually used as callback for blk_mq_hctx_map_queues(). >> + */ >> +const struct cpumask *pci_get_blk_mq_affinity(void *dev_data, int offset, >> + int queue) >> +{ >> + struct pci_dev *pdev = dev_data; >> + >> + return pci_irq_get_affinity(pdev, offset + queue); >> +} >> +EXPORT_SYMBOL_GPL(pci_get_blk_mq_affinity); >> +#endif > > IMO this doesn't really fit well in drivers/pci since it doesn't add > any PCI-specific knowledge or require any PCI core internals, and the > parameters are blk-specific. I don't object to the code, but it seems > like it could go somewhere in block/? Probably not a bad idea. Unrelated to that topic, but Daniel, all your email gets marked as spam. I didn't see your series before this reply. This has been common recently for people that haven't kept up with kernel.org changes, please check for smtp changes there. -- Jens Axboe
On Sun, Sep 15, 2024 at 02:32:30PM GMT, Jens Axboe wrote: > > IMO this doesn't really fit well in drivers/pci since it doesn't add > > any PCI-specific knowledge or require any PCI core internals, and the > > parameters are blk-specific. I don't object to the code, but it seems > > like it could go somewhere in block/? > > Probably not a bad idea. Christoph suggested to move these function to matching subsystem. I am fine either way. > Unrelated to that topic, but Daniel, all your email gets marked as spam. > I didn't see your series before this reply. This has been common > recently for people that haven't kept up with kernel.org changes, please > check for smtp changes there. Thanks for letting me know. FWIW, I switch over to use the kernel.org smtp server and I must miss some important config option.
© 2016 - 2024 Red Hat, Inc.