drivers/md/dm-crypt.c | 50 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-)
It was reported that allocating pages for the write buffer in dm-crypt
causes measurable overhead [1].
This patch changes dm-crypt to allocate compound pages if they are
available. If not, we fall back to the mempool.
[1] https://listman.redhat.com/archives/dm-devel/2023-February/053284.html
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
drivers/md/dm-crypt.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 42 insertions(+), 8 deletions(-)
Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c 2023-01-20 13:22:38.000000000 +0100
+++ linux-2.6/drivers/md/dm-crypt.c 2023-02-16 18:33:42.000000000 +0100
@@ -1657,6 +1657,9 @@ static void crypt_free_buffer_pages(stru
* In order to not degrade performance with excessive locking, we try
* non-blocking allocations without a mutex first but on failure we fallback
* to blocking allocations with a mutex.
+ *
+ * In order to reduce allocation overhead, we try to allocate compound pages in
+ * the first pass. If they are not available, we fall back to the mempool.
*/
static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
{
@@ -1664,8 +1667,9 @@ static struct bio *crypt_alloc_buffer(st
struct bio *clone;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
- unsigned i, len, remaining_size;
+ unsigned remaining_size;
struct page *page;
+ unsigned order = MAX_ORDER - 1;
retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
@@ -1678,20 +1682,37 @@ retry:
remaining_size = size;
- for (i = 0; i < nr_iovecs; i++) {
+ while (remaining_size) {
+ unsigned o;
+ unsigned remaining_order = __fls((remaining_size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ order = min(order, remaining_order);
+
+ while (order > 0) {
+ page = alloc_pages(gfp_mask
+ | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN, order);
+ if (likely(page != NULL))
+ goto have_pages;
+ order--;
+ }
+
page = mempool_alloc(&cc->page_pool, gfp_mask);
if (!page) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
gfp_mask |= __GFP_DIRECT_RECLAIM;
+ order = 0;
goto retry;
}
- len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
-
- bio_add_page(clone, page, len, 0);
+have_pages:
+ page->compound_order = order;
- remaining_size -= len;
+ for (o = 0; o < 1U << order; o++) {
+ unsigned len = min((unsigned)PAGE_SIZE, remaining_size);
+ bio_add_page(clone, page, len, 0);
+ remaining_size -= len;
+ page++;
+ }
}
/* Allocate space for integrity tags */
@@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru
{
struct bio_vec *bv;
struct bvec_iter_all iter_all;
+ unsigned skip_entries = 0;
bio_for_each_segment_all(bv, clone, iter_all) {
- BUG_ON(!bv->bv_page);
- mempool_free(bv->bv_page, &cc->page_pool);
+ unsigned order;
+ struct page *page = bv->bv_page;
+ BUG_ON(!page);
+ if (skip_entries) {
+ skip_entries--;
+ continue;
+ }
+ order = page->compound_order;
+ if (order) {
+ __free_pages(page, order);
+ skip_entries = (1U << order) - 1;
+ } else {
+ mempool_free(page, &cc->page_pool);
+ }
}
}
On Thu, Feb 16, 2023 at 12:47:08PM -0500, Mikulas Patocka wrote: > + while (order > 0) { > + page = alloc_pages(gfp_mask > + | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN, order); ... | __GFP_COMP > page = mempool_alloc(&cc->page_pool, gfp_mask); > if (!page) { > crypt_free_buffer_pages(cc, clone); > bio_put(clone); > gfp_mask |= __GFP_DIRECT_RECLAIM; > + order = 0; > goto retry; > } > > - len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; > - > - bio_add_page(clone, page, len, 0); > +have_pages: > + page->compound_order = order; No. You'll corrupt the next page if page is order-0, which it is if it came from the mempool. Also we've deleted page->compound_order in -next so you can't make this mistake. Using __GFP_COMP will set this field for you, so you can just drop this line. > - remaining_size -= len; > + for (o = 0; o < 1U << order; o++) { > + unsigned len = min((unsigned)PAGE_SIZE, remaining_size); > + bio_add_page(clone, page, len, 0); > + remaining_size -= len; > + page++; You can add multiple pages at once, whether they're compound or not. So replace this entire loop with: bio_add_page(clone, page, remaining_size, 0); > @@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru > { > struct bio_vec *bv; > struct bvec_iter_all iter_all; > + unsigned skip_entries = 0; > > bio_for_each_segment_all(bv, clone, iter_all) { > - BUG_ON(!bv->bv_page); > - mempool_free(bv->bv_page, &cc->page_pool); > + unsigned order; > + struct page *page = bv->bv_page; > + BUG_ON(!page); > + if (skip_entries) { > + skip_entries--; > + continue; > + } > + order = page->compound_order; > + if (order) { > + __free_pages(page, order); > + skip_entries = (1U << order) - 1; > + } else { > + mempool_free(page, &cc->page_pool); > + } You can simplify this by using the folio code. struct folio_iter fi; bio_for_each_folio_all(fi, bio) { if (folio_test_large(folio)) folio_put(folio); else mempool_free(&folio->page, &cc->page_pool); } (further work would actually convert this driver to use folios instead of pages)
On Thu, 16 Feb 2023, Matthew Wilcox wrote: > > - len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; > > - > > - bio_add_page(clone, page, len, 0); > > +have_pages: > > + page->compound_order = order; > > No. You'll corrupt the next page if page is order-0, which it is if it > came from the mempool. Also we've deleted page->compound_order in -next > so you can't make this mistake. Using __GFP_COMP will set this field > for you, so you can just drop this line. OK > > - remaining_size -= len; > > + for (o = 0; o < 1U << order; o++) { > > + unsigned len = min((unsigned)PAGE_SIZE, remaining_size); > > + bio_add_page(clone, page, len, 0); > > + remaining_size -= len; > > + page++; > > You can add multiple pages at once, whether they're compound or not. So > replace this entire loop with: > > bio_add_page(clone, page, remaining_size, 0); This should be min((unsigned)PAGE_SIZE << order, remaining_size), because we may allocate less than remaining_size. > > @@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru > > { > > struct bio_vec *bv; > > struct bvec_iter_all iter_all; > > + unsigned skip_entries = 0; > > > > bio_for_each_segment_all(bv, clone, iter_all) { > > - BUG_ON(!bv->bv_page); > > - mempool_free(bv->bv_page, &cc->page_pool); > > + unsigned order; > > + struct page *page = bv->bv_page; > > + BUG_ON(!page); > > + if (skip_entries) { > > + skip_entries--; > > + continue; > > + } > > + order = page->compound_order; > > + if (order) { > > + __free_pages(page, order); > > + skip_entries = (1U << order) - 1; > > + } else { > > + mempool_free(page, &cc->page_pool); > > + } > > You can simplify this by using the folio code. > > struct folio_iter fi; > > bio_for_each_folio_all(fi, bio) { > if (folio_test_large(folio)) > folio_put(folio); > else > mempool_free(&folio->page, &cc->page_pool); > } OK. I'm sending version 2 of the patch. > (further work would actually convert this driver to use folios instead > of pages) Mikulas
© 2016 - 2025 Red Hat, Inc.