Allow block driver to map and unmap a buffer for later I/O, as a performance
hint.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
block/block-backend.c | 10 ++++++++++
block/io.c | 24 ++++++++++++++++++++++++
include/block/block.h | 2 ++
include/block/block_int.h | 4 ++++
include/sysemu/block-backend.h | 3 +++
5 files changed, 43 insertions(+)
diff --git a/block/block-backend.c b/block/block-backend.c
index 0df3457..784b936 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1974,3 +1974,13 @@ static void blk_root_drained_end(BdrvChild *child)
}
}
}
+
+void blk_dma_map(BlockBackend *blk, void *host, size_t size)
+{
+ bdrv_dma_map(blk_bs(blk), host, size);
+}
+
+void blk_dma_unmap(BlockBackend *blk, void *host)
+{
+ bdrv_dma_unmap(blk_bs(blk), host);
+}
diff --git a/block/io.c b/block/io.c
index 2de7c77..988e4db 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2537,3 +2537,27 @@ void bdrv_io_unplug(BlockDriverState *bs)
bdrv_io_unplug(child->bs);
}
}
+
+void bdrv_dma_map(BlockDriverState *bs, void *host, size_t size)
+{
+ BdrvChild *child;
+
+ if (bs->drv && bs->drv->bdrv_dma_map) {
+ bs->drv->bdrv_dma_map(bs, host, size);
+ }
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_dma_map(child->bs, host, size);
+ }
+}
+
+void bdrv_dma_unmap(BlockDriverState *bs, void *host)
+{
+ BdrvChild *child;
+
+ if (bs->drv && bs->drv->bdrv_dma_unmap) {
+ bs->drv->bdrv_dma_unmap(bs, host);
+ }
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_dma_unmap(child->bs, host);
+ }
+}
diff --git a/include/block/block.h b/include/block/block.h
index 4c149ad..f59b50a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -624,4 +624,6 @@ void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
Error **errp);
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
+void bdrv_dma_map(BlockDriverState *bs, void *host, size_t size);
+void bdrv_dma_unmap(BlockDriverState *bs, void *host);
#endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 15fa602..4092669 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -381,6 +381,10 @@ struct BlockDriver {
uint64_t parent_perm, uint64_t parent_shared,
uint64_t *nperm, uint64_t *nshared);
+ /* Map and unmap a buffer for I/O, as a performance hint to the
+ * driver. */
+ void (*bdrv_dma_map)(BlockDriverState *bs, void *host, size_t size);
+ void (*bdrv_dma_unmap)(BlockDriverState *bs, void *host);
QLIST_ENTRY(BlockDriver) list;
};
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 1e05281..5f7ccdb 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -239,4 +239,7 @@ void blk_io_limits_disable(BlockBackend *blk);
void blk_io_limits_enable(BlockBackend *blk, const char *group);
void blk_io_limits_update_group(BlockBackend *blk, const char *group);
+void blk_dma_map(BlockBackend *blk, void *host, size_t size);
+void blk_dma_unmap(BlockBackend *blk, void *host);
+
#endif
--
2.9.4
On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote:
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 15fa602..4092669 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -381,6 +381,10 @@ struct BlockDriver {
> uint64_t parent_perm, uint64_t parent_shared,
> uint64_t *nperm, uint64_t *nshared);
>
> + /* Map and unmap a buffer for I/O, as a performance hint to the
> + * driver. */
> + void (*bdrv_dma_map)(BlockDriverState *bs, void *host, size_t size);
> + void (*bdrv_dma_unmap)(BlockDriverState *bs, void *host);
It's unclear what this API does and how to use it correctly. Please
flesh out the doc comments a bit to explain its use.
On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: > Allow block driver to map and unmap a buffer for later I/O, as a performance > hint. The name blk_dma_map() is confusing since other "dma" APIs like dma_addr_t and dma_blk_io() deal with guest physical addresses instead of host addresses. They are about DMA to/from guest RAM. Have you considered hiding this cached mapping in block/nvme.c so that it isn't exposed? block/nvme.c could keep the last buffer mapped and callers would get the performance benefit without a new blk_dma_map() API.
On 10/07/2017 17:07, Stefan Hajnoczi wrote: > On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: >> Allow block driver to map and unmap a buffer for later I/O, as a performance >> hint. > The name blk_dma_map() is confusing since other "dma" APIs like > dma_addr_t and dma_blk_io() deal with guest physical addresses instead > of host addresses. They are about DMA to/from guest RAM. > > Have you considered hiding this cached mapping in block/nvme.c so that > it isn't exposed? block/nvme.c could keep the last buffer mapped and > callers would get the performance benefit without a new blk_dma_map() > API. One buffer is enough for qemu-img bench, but not for more complex cases (e.g. fio). Paolo
On Mon, Jul 10, 2017 at 05:08:56PM +0200, Paolo Bonzini wrote: > On 10/07/2017 17:07, Stefan Hajnoczi wrote: > > On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: > >> Allow block driver to map and unmap a buffer for later I/O, as a performance > >> hint. > > The name blk_dma_map() is confusing since other "dma" APIs like > > dma_addr_t and dma_blk_io() deal with guest physical addresses instead > > of host addresses. They are about DMA to/from guest RAM. > > > > Have you considered hiding this cached mapping in block/nvme.c so that > > it isn't exposed? block/nvme.c could keep the last buffer mapped and > > callers would get the performance benefit without a new blk_dma_map() > > API. > > One buffer is enough for qemu-img bench, but not for more complex cases > (e.g. fio). I don't see any other blk_dma_map() callers. Stefan
On 11/07/2017 12:05, Stefan Hajnoczi wrote: > On Mon, Jul 10, 2017 at 05:08:56PM +0200, Paolo Bonzini wrote: >> On 10/07/2017 17:07, Stefan Hajnoczi wrote: >>> On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: >>>> Allow block driver to map and unmap a buffer for later I/O, as a performance >>>> hint. >>> The name blk_dma_map() is confusing since other "dma" APIs like >>> dma_addr_t and dma_blk_io() deal with guest physical addresses instead >>> of host addresses. They are about DMA to/from guest RAM. >>> >>> Have you considered hiding this cached mapping in block/nvme.c so that >>> it isn't exposed? block/nvme.c could keep the last buffer mapped and >>> callers would get the performance benefit without a new blk_dma_map() >>> API. >> >> One buffer is enough for qemu-img bench, but not for more complex cases >> (e.g. fio). > > I don't see any other blk_dma_map() callers. Indeed, the fio plugin is not part of this series, but it also used blk_dma_map. Without it, performance is awful. Paolo
On Tue, 07/11 12:28, Paolo Bonzini wrote: > On 11/07/2017 12:05, Stefan Hajnoczi wrote: > > On Mon, Jul 10, 2017 at 05:08:56PM +0200, Paolo Bonzini wrote: > >> On 10/07/2017 17:07, Stefan Hajnoczi wrote: > >>> On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: > >>>> Allow block driver to map and unmap a buffer for later I/O, as a performance > >>>> hint. > >>> The name blk_dma_map() is confusing since other "dma" APIs like > >>> dma_addr_t and dma_blk_io() deal with guest physical addresses instead > >>> of host addresses. They are about DMA to/from guest RAM. > >>> > >>> Have you considered hiding this cached mapping in block/nvme.c so that > >>> it isn't exposed? block/nvme.c could keep the last buffer mapped and > >>> callers would get the performance benefit without a new blk_dma_map() > >>> API. > >> > >> One buffer is enough for qemu-img bench, but not for more complex cases > >> (e.g. fio). > > > > I don't see any other blk_dma_map() callers. > > Indeed, the fio plugin is not part of this series, but it also used > blk_dma_map. Without it, performance is awful. How many buffers does fio use, typically? If it's not too many, block/nvme.c can cache the last N buffers. I'm with Stefan that hiding the mapping logic from block layer callers makes a nicer API, especially such that qemu-img is much easier to maintain good performance across subcommmands. Fam
On 12/07/2017 03:07, Fam Zheng wrote: > On Tue, 07/11 12:28, Paolo Bonzini wrote: >> On 11/07/2017 12:05, Stefan Hajnoczi wrote: >>> On Mon, Jul 10, 2017 at 05:08:56PM +0200, Paolo Bonzini wrote: >>>> On 10/07/2017 17:07, Stefan Hajnoczi wrote: >>>>> On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: >>>>>> Allow block driver to map and unmap a buffer for later I/O, as a performance >>>>>> hint. >>>>> The name blk_dma_map() is confusing since other "dma" APIs like >>>>> dma_addr_t and dma_blk_io() deal with guest physical addresses instead >>>>> of host addresses. They are about DMA to/from guest RAM. >>>>> >>>>> Have you considered hiding this cached mapping in block/nvme.c so that >>>>> it isn't exposed? block/nvme.c could keep the last buffer mapped and >>>>> callers would get the performance benefit without a new blk_dma_map() >>>>> API. >>>> >>>> One buffer is enough for qemu-img bench, but not for more complex cases >>>> (e.g. fio). >>> >>> I don't see any other blk_dma_map() callers. >> >> Indeed, the fio plugin is not part of this series, but it also used >> blk_dma_map. Without it, performance is awful. > > How many buffers does fio use, typically? If it's not too many, block/nvme.c can > cache the last N buffers. I'm with Stefan that hiding the mapping logic from > block layer callers makes a nicer API, especially such that qemu-img is much > easier to maintain good performance across subcommmands. It depends on the queue depth. I think the API addition is necessary, otherwise we wouldn't have added the RAMBlockNotifier which is a layering violation that does the same thing (create permanent HVA->IOVA mappings). In fact, the RAMBlockNotifier could be moved out of nvme.c and made to use blk_dma_map/unmap, though I'm not proposing to do it now. I don't think qemu-img convert and dd are impacted by IOMMU map/unmap as heavily as bench, because they operate with queue depth 1. But adding map/unmap there would not be hard. Paolo
On Wed, Jul 12, 2017 at 04:03:57PM +0200, Paolo Bonzini wrote: > On 12/07/2017 03:07, Fam Zheng wrote: > > On Tue, 07/11 12:28, Paolo Bonzini wrote: > >> On 11/07/2017 12:05, Stefan Hajnoczi wrote: > >>> On Mon, Jul 10, 2017 at 05:08:56PM +0200, Paolo Bonzini wrote: > >>>> On 10/07/2017 17:07, Stefan Hajnoczi wrote: > >>>>> On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: > >>>>>> Allow block driver to map and unmap a buffer for later I/O, as a performance > >>>>>> hint. > >>>>> The name blk_dma_map() is confusing since other "dma" APIs like > >>>>> dma_addr_t and dma_blk_io() deal with guest physical addresses instead > >>>>> of host addresses. They are about DMA to/from guest RAM. > >>>>> > >>>>> Have you considered hiding this cached mapping in block/nvme.c so that > >>>>> it isn't exposed? block/nvme.c could keep the last buffer mapped and > >>>>> callers would get the performance benefit without a new blk_dma_map() > >>>>> API. > >>>> > >>>> One buffer is enough for qemu-img bench, but not for more complex cases > >>>> (e.g. fio). > >>> > >>> I don't see any other blk_dma_map() callers. > >> > >> Indeed, the fio plugin is not part of this series, but it also used > >> blk_dma_map. Without it, performance is awful. > > > > How many buffers does fio use, typically? If it's not too many, block/nvme.c can > > cache the last N buffers. I'm with Stefan that hiding the mapping logic from > > block layer callers makes a nicer API, especially such that qemu-img is much > > easier to maintain good performance across subcommmands. > > It depends on the queue depth. > > I think the API addition is necessary, otherwise we wouldn't have added > the RAMBlockNotifier which is a layering violation that does the same > thing (create permanent HVA->IOVA mappings). In fact, the > RAMBlockNotifier could be moved out of nvme.c and made to use > blk_dma_map/unmap, though I'm not proposing to do it now. > > I don't think qemu-img convert and dd are impacted by IOMMU map/unmap as > heavily as bench, because they operate with queue depth 1. But adding > map/unmap there would not be hard. I'm not against an API existing for this. I would just ask: 1. It's documented so the purpose and semantics are clear. 2. The name cannot be confused with dma-helpers.c APIs. Maybe blk_register_buf() or blk_add_buf_hint()?
On 14/07/2017 15:37, Stefan Hajnoczi wrote: > On Wed, Jul 12, 2017 at 04:03:57PM +0200, Paolo Bonzini wrote: >> On 12/07/2017 03:07, Fam Zheng wrote: >>> On Tue, 07/11 12:28, Paolo Bonzini wrote: >>>> On 11/07/2017 12:05, Stefan Hajnoczi wrote: >>>>> On Mon, Jul 10, 2017 at 05:08:56PM +0200, Paolo Bonzini wrote: >>>>>> On 10/07/2017 17:07, Stefan Hajnoczi wrote: >>>>>>> On Wed, Jul 05, 2017 at 09:36:32PM +0800, Fam Zheng wrote: >>>>>>>> Allow block driver to map and unmap a buffer for later I/O, as a performance >>>>>>>> hint. >>>>>>> The name blk_dma_map() is confusing since other "dma" APIs like >>>>>>> dma_addr_t and dma_blk_io() deal with guest physical addresses instead >>>>>>> of host addresses. They are about DMA to/from guest RAM. >>>>>>> >>>>>>> Have you considered hiding this cached mapping in block/nvme.c so that >>>>>>> it isn't exposed? block/nvme.c could keep the last buffer mapped and >>>>>>> callers would get the performance benefit without a new blk_dma_map() >>>>>>> API. >>>>>> >>>>>> One buffer is enough for qemu-img bench, but not for more complex cases >>>>>> (e.g. fio). >>>>> >>>>> I don't see any other blk_dma_map() callers. >>>> >>>> Indeed, the fio plugin is not part of this series, but it also used >>>> blk_dma_map. Without it, performance is awful. >>> >>> How many buffers does fio use, typically? If it's not too many, block/nvme.c can >>> cache the last N buffers. I'm with Stefan that hiding the mapping logic from >>> block layer callers makes a nicer API, especially such that qemu-img is much >>> easier to maintain good performance across subcommmands. >> >> It depends on the queue depth. >> >> I think the API addition is necessary, otherwise we wouldn't have added >> the RAMBlockNotifier which is a layering violation that does the same >> thing (create permanent HVA->IOVA mappings). In fact, the >> RAMBlockNotifier could be moved out of nvme.c and made to use >> blk_dma_map/unmap, though I'm not proposing to do it now. >> >> I don't think qemu-img convert and dd are impacted by IOMMU map/unmap as >> heavily as bench, because they operate with queue depth 1. But adding >> map/unmap there would not be hard. > > I'm not against an API existing for this. I would just ask: > > 1. It's documented so the purpose and semantics are clear. > 2. The name cannot be confused with dma-helpers.c APIs. Yes, I agree completely. > Maybe blk_register_buf() or blk_add_buf_hint()? blk_(un)register_buf, or perhaps iobuf or io_buffer, sounds good to me. Paolo
© 2016 - 2026 Red Hat, Inc.