[v1] folio support for sync I/O in RAID

[PATCH 06/15] md/raid1,raid10: use folio for sync path IO

Posted by linan666@huaweicloud.com 1 month, 3 weeks ago

From: Li Nan <linan122@huawei.com>

Convert all IO on the sync path to use folios. Rename page-related
identifiers to match folio.

Retain some now-unnecessary while and for loops to minimize code
changes, clean them up in a subsequent patch.

Signed-off-by: Li Nan <linan122@huawei.com>
---
 drivers/md/md.c       |   2 +-
 drivers/md/raid1-10.c |  60 ++++--------
 drivers/md/raid1.c    | 155 ++++++++++++++-----------------
 drivers/md/raid10.c   | 207 +++++++++++++++++++-----------------------
 4 files changed, 179 insertions(+), 245 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0732bbcdb95d..dac03b831efa 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9409,7 +9409,7 @@ static bool sync_io_within_limit(struct mddev *mddev)
 {
 	/*
 	 * For raid456, sync IO is stripe(4k) per IO, for other levels, it's
-	 * RESYNC_PAGES(64k) per IO.
+	 * RESYNC_BLOCK_SIZE(64k) per IO.
 	 */
 	return atomic_read(&mddev->recovery_active) <
 	       (raid_is_456(mddev) ? 8 : 128) * sync_io_depth(mddev);
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 260d7fd7ccbe..b8f2cc32606f 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -25,9 +25,9 @@
 #define MAX_PLUG_BIO 32
 
 /* for managing resync I/O pages */
-struct resync_pages {
+struct resync_folio {
 	void		*raid_bio;
-	struct page	*pages[RESYNC_PAGES];
+	struct folio	*folio;
 };
 
 struct raid1_plug_cb {
@@ -41,77 +41,55 @@ static void rbio_pool_free(void *rbio, void *data)
 	kfree(rbio);
 }
 
-static inline int resync_alloc_pages(struct resync_pages *rp,
+static inline int resync_alloc_folio(struct resync_folio *rf,
 				     gfp_t gfp_flags)
 {
-	int i;
-
-	for (i = 0; i < RESYNC_PAGES; i++) {
-		rp->pages[i] = alloc_page(gfp_flags);
-		if (!rp->pages[i])
-			goto out_free;
-	}
+	rf->folio = folio_alloc(gfp_flags, get_order(RESYNC_BLOCK_SIZE));
+	if (!rf->folio)
+		return -ENOMEM;
 
 	return 0;
-
-out_free:
-	while (--i >= 0)
-		put_page(rp->pages[i]);
-	return -ENOMEM;
 }
 
-static inline void resync_free_pages(struct resync_pages *rp)
+static inline void resync_free_folio(struct resync_folio *rf)
 {
-	int i;
-
-	for (i = 0; i < RESYNC_PAGES; i++)
-		put_page(rp->pages[i]);
+	folio_put(rf->folio);
 }
 
-static inline void resync_get_all_pages(struct resync_pages *rp)
+static inline void resync_get_all_folio(struct resync_folio *rf)
 {
-	int i;
-
-	for (i = 0; i < RESYNC_PAGES; i++)
-		get_page(rp->pages[i]);
+	folio_get(rf->folio);
 }
 
-static inline struct page *resync_fetch_page(struct resync_pages *rp,
-					     unsigned idx)
+static inline struct folio *resync_fetch_folio(struct resync_folio *rf)
 {
-	if (WARN_ON_ONCE(idx >= RESYNC_PAGES))
-		return NULL;
-	return rp->pages[idx];
+	return rf->folio;
 }
 
 /*
- * 'strct resync_pages' stores actual pages used for doing the resync
+ * 'strct resync_folio' stores actual pages used for doing the resync
  *  IO, and it is per-bio, so make .bi_private points to it.
  */
-static inline struct resync_pages *get_resync_pages(struct bio *bio)
+static inline struct resync_folio *get_resync_folio(struct bio *bio)
 {
 	return bio->bi_private;
 }
 
 /* generally called after bio_reset() for reseting bvec */
-static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
+static void md_bio_reset_resync_folio(struct bio *bio, struct resync_folio *rf,
 			       int size)
 {
-	int idx = 0;
-
 	/* initialize bvec table again */
 	do {
-		struct page *page = resync_fetch_page(rp, idx);
-		int len = min_t(int, size, PAGE_SIZE);
+		struct folio *folio = resync_fetch_folio(rf);
+		int len = min_t(int, size, RESYNC_BLOCK_SIZE);
 
-		if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
+		if (WARN_ON(!bio_add_folio(bio, folio, len, 0))) {
 			bio->bi_status = BLK_STS_RESOURCE;
 			bio_endio(bio);
 			return;
 		}
-
-		size -= len;
-	} while (idx++ < RESYNC_PAGES && size > 0);
+	} while (0);
 }
 
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 43453f1a04f4..370bdecf5487 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -120,11 +120,11 @@ static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
 
 /*
  * for resync bio, r1bio pointer can be retrieved from the per-bio
- * 'struct resync_pages'.
+ * 'struct resync_folio'.
  */
 static inline struct r1bio *get_resync_r1bio(struct bio *bio)
 {
-	return get_resync_pages(bio)->raid_bio;
+	return get_resync_folio(bio)->raid_bio;
 }
 
 static void *r1bio_pool_alloc(gfp_t gfp_flags, struct r1conf *conf)
@@ -146,70 +146,69 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 	struct r1conf *conf = data;
 	struct r1bio *r1_bio;
 	struct bio *bio;
-	int need_pages;
+	int need_folio;
 	int j;
-	struct resync_pages *rps;
+	struct resync_folio *rfs;
 
 	r1_bio = r1bio_pool_alloc(gfp_flags, conf);
 	if (!r1_bio)
 		return NULL;
 
-	rps = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_pages),
+	rfs = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_folio),
 			    gfp_flags);
-	if (!rps)
+	if (!rfs)
 		goto out_free_r1bio;
 
 	/*
 	 * Allocate bios : 1 for reading, n-1 for writing
 	 */
 	for (j = conf->raid_disks * 2; j-- ; ) {
-		bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
+		bio = bio_kmalloc(1, gfp_flags);
 		if (!bio)
 			goto out_free_bio;
-		bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
+		bio_init_inline(bio, NULL, 1, 0);
 		r1_bio->bios[j] = bio;
 	}
 	/*
-	 * Allocate RESYNC_PAGES data pages and attach them to
-	 * the first bio.
+	 * Allocate data folio and attach them to the first bio.
 	 * If this is a user-requested check/repair, allocate
-	 * RESYNC_PAGES for each bio.
+	 * folio for each bio.
 	 */
 	if (test_bit(MD_RECOVERY_REQUESTED, &conf->mddev->recovery))
-		need_pages = conf->raid_disks * 2;
+		need_folio = conf->raid_disks * 2;
 	else
-		need_pages = 1;
+		need_folio = 1;
 	for (j = 0; j < conf->raid_disks * 2; j++) {
-		struct resync_pages *rp = &rps[j];
+		struct resync_folio *rf = &rfs[j];
 
 		bio = r1_bio->bios[j];
 
-		if (j < need_pages) {
-			if (resync_alloc_pages(rp, gfp_flags))
-				goto out_free_pages;
+		if (j < need_folio) {
+			if (resync_alloc_folio(rf, gfp_flags))
+				goto out_free_folio;
 		} else {
-			memcpy(rp, &rps[0], sizeof(*rp));
-			resync_get_all_pages(rp);
+			memcpy(rf, &rfs[0], sizeof(*rf));
+			resync_get_all_folio(rf);
 		}
 
-		rp->raid_bio = r1_bio;
-		bio->bi_private = rp;
+		rf->raid_bio = r1_bio;
+		bio->bi_private = rf;
 	}
 
 	r1_bio->master_bio = NULL;
 
 	return r1_bio;
 
-out_free_pages:
+out_free_folio:
 	while (--j >= 0)
-		resync_free_pages(&rps[j]);
+		resync_free_folio(&rfs[j]);
 
 out_free_bio:
 	while (++j < conf->raid_disks * 2) {
 		bio_uninit(r1_bio->bios[j]);
 		kfree(r1_bio->bios[j]);
 	}
-	kfree(rps);
+	kfree(rfs);
 
 out_free_r1bio:
 	rbio_pool_free(r1_bio, data);
@@ -221,17 +220,17 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
 	struct r1conf *conf = data;
 	int i;
 	struct r1bio *r1bio = __r1_bio;
-	struct resync_pages *rp = NULL;
+	struct resync_folio *rf = NULL;
 
 	for (i = conf->raid_disks * 2; i--; ) {
-		rp = get_resync_pages(r1bio->bios[i]);
-		resync_free_pages(rp);
+		rf = get_resync_folio(r1bio->bios[i]);
+		resync_free_folio(rf);
 		bio_uninit(r1bio->bios[i]);
 		kfree(r1bio->bios[i]);
 	}
 
-	/* resync pages array stored in the 1st bio's .bi_private */
-	kfree(rp);
+	/* resync folio stored in the 1st bio's .bi_private */
+	kfree(rf);
 
 	rbio_pool_free(r1bio, data);
 }
@@ -2095,10 +2094,10 @@ static void end_sync_write(struct bio *bio)
 	put_sync_write_buf(r1_bio);
 }
 
-static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
-			   int sectors, struct page *page, blk_opf_t rw)
+static int r1_sync_folio_io(struct md_rdev *rdev, sector_t sector, int sectors,
+			    int off, struct folio *folio, blk_opf_t rw)
 {
-	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+	if (sync_folio_io(rdev, sector, sectors << 9, off, folio, rw, false))
 		/* success */
 		return 1;
 	if (rw == REQ_OP_WRITE) {
@@ -2129,10 +2128,10 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 	struct mddev *mddev = r1_bio->mddev;
 	struct r1conf *conf = mddev->private;
 	struct bio *bio = r1_bio->bios[r1_bio->read_disk];
-	struct page **pages = get_resync_pages(bio)->pages;
+	struct folio *folio = get_resync_folio(bio)->folio;
 	sector_t sect = r1_bio->sector;
 	int sectors = r1_bio->sectors;
-	int idx = 0;
+	int off = 0;
 	struct md_rdev *rdev;
 
 	rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -2162,9 +2161,8 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 				 * active, and resync is currently active
 				 */
 				rdev = conf->mirrors[d].rdev;
-				if (sync_page_io(rdev, sect, s<<9,
-						 pages[idx],
-						 REQ_OP_READ, false)) {
+				if (sync_folio_io(rdev, sect, s<<9, off, folio,
+						  REQ_OP_READ, false)) {
 					success = 1;
 					break;
 				}
@@ -2197,7 +2195,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 			/* Try next page */
 			sectors -= s;
 			sect += s;
-			idx++;
+			off += s << 9;
 			continue;
 		}
 
@@ -2210,8 +2208,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
 				continue;
 			rdev = conf->mirrors[d].rdev;
-			if (r1_sync_page_io(rdev, sect, s,
-					    pages[idx],
+			if (r1_sync_folio_io(rdev, sect, s, off, folio,
 					    REQ_OP_WRITE) == 0) {
 				r1_bio->bios[d]->bi_end_io = NULL;
 				rdev_dec_pending(rdev, mddev);
@@ -2225,14 +2222,13 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
 				continue;
 			rdev = conf->mirrors[d].rdev;
-			if (r1_sync_page_io(rdev, sect, s,
-					    pages[idx],
+			if (r1_sync_folio_io(rdev, sect, s, off, folio,
 					    REQ_OP_READ) != 0)
 				atomic_add(s, &rdev->corrected_errors);
 		}
 		sectors -= s;
 		sect += s;
-		idx ++;
+		off += s << 9;
 	}
 	set_bit(R1BIO_Uptodate, &r1_bio->state);
 	bio->bi_status = 0;
@@ -2252,14 +2248,12 @@ static void process_checks(struct r1bio *r1_bio)
 	struct r1conf *conf = mddev->private;
 	int primary;
 	int i;
-	int vcnt;
 
 	/* Fix variable parts of all bios */
-	vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
 	for (i = 0; i < conf->raid_disks * 2; i++) {
 		blk_status_t status;
 		struct bio *b = r1_bio->bios[i];
-		struct resync_pages *rp = get_resync_pages(b);
+		struct resync_folio *rf = get_resync_folio(b);
 		if (b->bi_end_io != end_sync_read)
 			continue;
 		/* fixup the bio for reuse, but preserve errno */
@@ -2269,11 +2263,11 @@ static void process_checks(struct r1bio *r1_bio)
 		b->bi_iter.bi_sector = r1_bio->sector +
 			conf->mirrors[i].rdev->data_offset;
 		b->bi_end_io = end_sync_read;
-		rp->raid_bio = r1_bio;
-		b->bi_private = rp;
+		rf->raid_bio = r1_bio;
+		b->bi_private = rf;
 
 		/* initialize bvec table again */
-		md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9);
+		md_bio_reset_resync_folio(b, rf, r1_bio->sectors << 9);
 	}
 	for (primary = 0; primary < conf->raid_disks * 2; primary++)
 		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
@@ -2284,44 +2278,30 @@ static void process_checks(struct r1bio *r1_bio)
 		}
 	r1_bio->read_disk = primary;
 	for (i = 0; i < conf->raid_disks * 2; i++) {
-		int j = 0;
 		struct bio *pbio = r1_bio->bios[primary];
 		struct bio *sbio = r1_bio->bios[i];
 		blk_status_t status = sbio->bi_status;
-		struct page **ppages = get_resync_pages(pbio)->pages;
-		struct page **spages = get_resync_pages(sbio)->pages;
-		struct bio_vec *bi;
-		int page_len[RESYNC_PAGES] = { 0 };
-		struct bvec_iter_all iter_all;
+		struct folio *pfolio = get_resync_folio(pbio)->folio;
+		struct folio *sfolio = get_resync_folio(sbio)->folio;
 
 		if (sbio->bi_end_io != end_sync_read)
 			continue;
 		/* Now we can 'fixup' the error value */
 		sbio->bi_status = 0;
 
-		bio_for_each_segment_all(bi, sbio, iter_all)
-			page_len[j++] = bi->bv_len;
-
-		if (!status) {
-			for (j = vcnt; j-- ; ) {
-				if (memcmp(page_address(ppages[j]),
-					   page_address(spages[j]),
-					   page_len[j]))
-					break;
-			}
-		} else
-			j = 0;
-		if (j >= 0)
+		if (status || memcmp(folio_address(pfolio),
+				     folio_address(sfolio),
+				     r1_bio->sectors << 9)) {
 			atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
-		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-			      && !status)) {
-			/* No need to write to this device. */
-			sbio->bi_end_io = NULL;
-			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
-			continue;
+			if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
+				bio_copy_data(sbio, pbio);
+				continue;
+			}
 		}
 
-		bio_copy_data(sbio, pbio);
+		/* No need to write to this device. */
+		sbio->bi_end_io = NULL;
+		rdev_dec_pending(conf->mirrors[i].rdev, mddev);
 	}
 }
 
@@ -2446,9 +2426,8 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
 			if (rdev &&
 			    !test_bit(Faulty, &rdev->flags)) {
 				atomic_inc(&rdev->nr_pending);
-				r1_sync_page_io(rdev, sect, s,
-						folio_page(conf->tmpfolio, 0),
-						REQ_OP_WRITE);
+				r1_sync_folio_io(rdev, sect, s, 0,
+						conf->tmpfolio, REQ_OP_WRITE);
 				rdev_dec_pending(rdev, mddev);
 			}
 		}
@@ -2461,9 +2440,8 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
 			if (rdev &&
 			    !test_bit(Faulty, &rdev->flags)) {
 				atomic_inc(&rdev->nr_pending);
-				if (r1_sync_page_io(rdev, sect, s,
-						folio_page(conf->tmpfolio, 0),
-						REQ_OP_READ)) {
+				if (r1_sync_folio_io(rdev, sect, s, 0,
+						conf->tmpfolio, REQ_OP_READ)) {
 					atomic_add(s, &rdev->corrected_errors);
 					pr_info("md/raid1:%s: read error corrected (%d sectors at %llu on %pg)\n",
 						mdname(mddev), s,
@@ -2799,7 +2777,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 	int good_sectors = RESYNC_SECTORS;
 	int min_bad = 0; /* number of sectors that are bad in all devices */
 	int idx = sector_to_idx(sector_nr);
-	int page_idx = 0;
 
 	if (!mempool_initialized(&conf->r1buf_pool))
 		if (init_resync(conf))
@@ -3003,8 +2980,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 	nr_sectors = 0;
 	sync_blocks = 0;
 	do {
-		struct page *page;
-		int len = PAGE_SIZE;
+		struct folio *folio;
+		int len = RESYNC_BLOCK_SIZE;
 		if (sector_nr + (len>>9) > max_sector)
 			len = (max_sector - sector_nr) << 9;
 		if (len == 0)
@@ -3020,24 +2997,24 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 		}
 
 		for (i = 0 ; i < conf->raid_disks * 2; i++) {
-			struct resync_pages *rp;
+			struct resync_folio *rf;
 
 			bio = r1_bio->bios[i];
-			rp = get_resync_pages(bio);
+			rf = get_resync_folio(bio);
 			if (bio->bi_end_io) {
-				page = resync_fetch_page(rp, page_idx);
+				folio = resync_fetch_folio(rf);
 
 				/*
 				 * won't fail because the vec table is big
 				 * enough to hold all these pages
 				 */
-				__bio_add_page(bio, page, len, 0);
+				bio_add_folio_nofail(bio, folio, len, 0);
 			}
 		}
 		nr_sectors += len>>9;
 		sector_nr += len>>9;
 		sync_blocks -= (len>>9);
-	} while (++page_idx < RESYNC_PAGES);
+	} while (0);
 
 	r1_bio->sectors = nr_sectors;
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 09238dc9cde6..c93706806358 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -96,11 +96,11 @@ static void end_reshape(struct r10conf *conf);
 
 /*
  * for resync bio, r10bio pointer can be retrieved from the per-bio
- * 'struct resync_pages'.
+ * 'struct resync_folio'.
  */
 static inline struct r10bio *get_resync_r10bio(struct bio *bio)
 {
-	return get_resync_pages(bio)->raid_bio;
+	return get_resync_folio(bio)->raid_bio;
 }
 
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -133,8 +133,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 	struct r10bio *r10_bio;
 	struct bio *bio;
 	int j;
-	int nalloc, nalloc_rp;
-	struct resync_pages *rps;
+	int nalloc, nalloc_rf;
+	struct resync_folio *rfs;
 
 	r10_bio = r10bio_pool_alloc(gfp_flags, conf);
 	if (!r10_bio)
@@ -148,58 +148,57 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 
 	/* allocate once for all bios */
 	if (!conf->have_replacement)
-		nalloc_rp = nalloc;
+		nalloc_rf = nalloc;
 	else
-		nalloc_rp = nalloc * 2;
-	rps = kmalloc_array(nalloc_rp, sizeof(struct resync_pages), gfp_flags);
-	if (!rps)
+		nalloc_rf = nalloc * 2;
+	rfs = kmalloc_array(nalloc_rf, sizeof(struct resync_folio), gfp_flags);
+	if (!rfs)
 		goto out_free_r10bio;
 
 	/*
 	 * Allocate bios.
 	 */
 	for (j = nalloc ; j-- ; ) {
-		bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
+		bio = bio_kmalloc(1, gfp_flags);
 		if (!bio)
 			goto out_free_bio;
-		bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
+		bio_init_inline(bio, NULL, 1, 0);
 		r10_bio->devs[j].bio = bio;
 		if (!conf->have_replacement)
 			continue;
-		bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
+		bio = bio_kmalloc(1, gfp_flags);
 		if (!bio)
 			goto out_free_bio;
-		bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
+		bio_init_inline(bio, NULL, 1, 0);
 		r10_bio->devs[j].repl_bio = bio;
 	}
 	/*
-	 * Allocate RESYNC_PAGES data pages and attach them
-	 * where needed.
+	 * Allocate data folio and attach them where needed.
 	 */
 	for (j = 0; j < nalloc; j++) {
 		struct bio *rbio = r10_bio->devs[j].repl_bio;
-		struct resync_pages *rp, *rp_repl;
+		struct resync_folio *rf, *rf_repl;
 
-		rp = &rps[j];
+		rf = &rfs[j];
 		if (rbio)
-			rp_repl = &rps[nalloc + j];
+			rf_repl = &rfs[nalloc + j];
 
 		bio = r10_bio->devs[j].bio;
 
 		if (!j || test_bit(MD_RECOVERY_SYNC,
 				   &conf->mddev->recovery)) {
-			if (resync_alloc_pages(rp, gfp_flags))
+			if (resync_alloc_folio(rf, gfp_flags))
 				goto out_free_pages;
 		} else {
-			memcpy(rp, &rps[0], sizeof(*rp));
-			resync_get_all_pages(rp);
+			memcpy(rf, &rfs[0], sizeof(*rf));
+			resync_get_all_folio(rf);
 		}
 
-		rp->raid_bio = r10_bio;
-		bio->bi_private = rp;
+		rf->raid_bio = r10_bio;
+		bio->bi_private = rf;
 		if (rbio) {
-			memcpy(rp_repl, rp, sizeof(*rp));
-			rbio->bi_private = rp_repl;
+			memcpy(rf_repl, rf, sizeof(*rf));
+			rbio->bi_private = rf_repl;
 		}
 	}
 
@@ -207,7 +206,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 
 out_free_pages:
 	while (--j >= 0)
-		resync_free_pages(&rps[j]);
+		resync_free_folio(&rfs[j]);
 
 	j = 0;
 out_free_bio:
@@ -219,7 +218,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 			bio_uninit(r10_bio->devs[j].repl_bio);
 		kfree(r10_bio->devs[j].repl_bio);
 	}
-	kfree(rps);
+	kfree(rfs);
 out_free_r10bio:
 	rbio_pool_free(r10_bio, conf);
 	return NULL;
@@ -230,14 +229,14 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
 	struct r10conf *conf = data;
 	struct r10bio *r10bio = __r10_bio;
 	int j;
-	struct resync_pages *rp = NULL;
+	struct resync_folio *rf = NULL;
 
 	for (j = conf->copies; j--; ) {
 		struct bio *bio = r10bio->devs[j].bio;
 
 		if (bio) {
-			rp = get_resync_pages(bio);
-			resync_free_pages(rp);
+			rf = get_resync_folio(bio);
+			resync_free_folio(rf);
 			bio_uninit(bio);
 			kfree(bio);
 		}
@@ -250,7 +249,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
 	}
 
 	/* resync pages array stored in the 1st bio's .bi_private */
-	kfree(rp);
+	kfree(rf);
 
 	rbio_pool_free(r10bio, conf);
 }
@@ -2342,8 +2341,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 	struct r10conf *conf = mddev->private;
 	int i, first;
 	struct bio *tbio, *fbio;
-	int vcnt;
-	struct page **tpages, **fpages;
+	struct folio *tfolio, *ffolio;
 
 	atomic_set(&r10_bio->remaining, 1);
 
@@ -2359,14 +2357,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 	fbio = r10_bio->devs[i].bio;
 	fbio->bi_iter.bi_size = r10_bio->sectors << 9;
 	fbio->bi_iter.bi_idx = 0;
-	fpages = get_resync_pages(fbio)->pages;
+	ffolio = get_resync_folio(fbio)->folio;
 
-	vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
 	/* now find blocks with errors */
 	for (i=0 ; i < conf->copies ; i++) {
-		int  j, d;
+		int  d;
 		struct md_rdev *rdev;
-		struct resync_pages *rp;
+		struct resync_folio *rf;
 
 		tbio = r10_bio->devs[i].bio;
 
@@ -2375,31 +2372,23 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		if (i == first)
 			continue;
 
-		tpages = get_resync_pages(tbio)->pages;
+		tfolio = get_resync_folio(tbio)->folio;
 		d = r10_bio->devs[i].devnum;
 		rdev = conf->mirrors[d].rdev;
 		if (!r10_bio->devs[i].bio->bi_status) {
 			/* We know that the bi_io_vec layout is the same for
 			 * both 'first' and 'i', so we just compare them.
-			 * All vec entries are PAGE_SIZE;
 			 */
-			int sectors = r10_bio->sectors;
-			for (j = 0; j < vcnt; j++) {
-				int len = PAGE_SIZE;
-				if (sectors < (len / 512))
-					len = sectors * 512;
-				if (memcmp(page_address(fpages[j]),
-					   page_address(tpages[j]),
-					   len))
-					break;
-				sectors -= len/512;
+			if (memcmp(folio_address(ffolio),
+				   folio_address(tfolio),
+				   r10_bio->sectors << 9)) {
+				atomic64_add(r10_bio->sectors,
+					     &mddev->resync_mismatches);
+				if (test_bit(MD_RECOVERY_CHECK,
+					     &mddev->recovery))
+					/* Don't fix anything. */
+					continue;
 			}
-			if (j == vcnt)
-				continue;
-			atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
-			if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
-				/* Don't fix anything. */
-				continue;
 		} else if (test_bit(FailFast, &rdev->flags)) {
 			/* Just give up on this device */
 			md_error(rdev->mddev, rdev);
@@ -2410,13 +2399,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		 * First we need to fixup bv_offset, bv_len and
 		 * bi_vecs, as the read request might have corrupted these
 		 */
-		rp = get_resync_pages(tbio);
+		rf = get_resync_folio(tbio);
 		bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE);
 
-		md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
+		md_bio_reset_resync_folio(tbio, rf, fbio->bi_iter.bi_size);
 
-		rp->raid_bio = r10_bio;
-		tbio->bi_private = rp;
+		rf->raid_bio = r10_bio;
+		tbio->bi_private = rf;
 		tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
 		tbio->bi_end_io = end_sync_write;
 
@@ -2476,10 +2465,9 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
 	struct bio *bio = r10_bio->devs[0].bio;
 	sector_t sect = 0;
 	int sectors = r10_bio->sectors;
-	int idx = 0;
 	int dr = r10_bio->devs[0].devnum;
 	int dw = r10_bio->devs[1].devnum;
-	struct page **pages = get_resync_pages(bio)->pages;
+	struct folio *folio = get_resync_folio(bio)->folio;
 
 	while (sectors) {
 		int s = sectors;
@@ -2492,19 +2480,21 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
 
 		rdev = conf->mirrors[dr].rdev;
 		addr = r10_bio->devs[0].addr + sect;
-		ok = sync_page_io(rdev,
-				  addr,
-				  s << 9,
-				  pages[idx],
-				  REQ_OP_READ, false);
+		ok = sync_folio_io(rdev,
+				   addr,
+				   s << 9,
+				   sect << 9,
+				   folio,
+				   REQ_OP_READ, false);
 		if (ok) {
 			rdev = conf->mirrors[dw].rdev;
 			addr = r10_bio->devs[1].addr + sect;
-			ok = sync_page_io(rdev,
-					  addr,
-					  s << 9,
-					  pages[idx],
-					  REQ_OP_WRITE, false);
+			ok = sync_folio_io(rdev,
+					   addr,
+					   s << 9,
+					   sect << 9,
+					   folio,
+					   REQ_OP_WRITE, false);
 			if (!ok) {
 				set_bit(WriteErrorSeen, &rdev->flags);
 				if (!test_and_set_bit(WantReplacement,
@@ -2539,7 +2529,6 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
 
 		sectors -= s;
 		sect += s;
-		idx++;
 	}
 }
 
@@ -3174,7 +3163,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 	int max_sync = RESYNC_SECTORS;
 	sector_t sync_blocks;
 	sector_t chunk_mask = conf->geo.chunk_mask;
-	int page_idx = 0;
 
 	/*
 	 * Allow skipping a full rebuild for incremental assembly
@@ -3277,7 +3265,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 	 * with 2 bios in each, that correspond to the bios in the main one.
 	 * In this case, the subordinate r10bios link back through a
 	 * borrowed master_bio pointer, and the counter in the master
-	 * includes a ref from each subordinate.
+	 * bio_add_folio includes a ref from each subordinate.
 	 */
 	/* First, we decide what to do and set ->bi_end_io
 	 * To end_sync_read if we want to read, and
@@ -3642,25 +3630,26 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 	if (sector_nr + max_sync < max_sector)
 		max_sector = sector_nr + max_sync;
 	do {
-		struct page *page;
-		int len = PAGE_SIZE;
+		int len = RESYNC_BLOCK_SIZE;
+
 		if (sector_nr + (len>>9) > max_sector)
 			len = (max_sector - sector_nr) << 9;
 		if (len == 0)
 			break;
 		for (bio= biolist ; bio ; bio=bio->bi_next) {
-			struct resync_pages *rp = get_resync_pages(bio);
-			page = resync_fetch_page(rp, page_idx);
-			if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
+			struct resync_folio *rf = get_resync_folio(bio);
+			struct folio *folio = resync_fetch_folio(rf);
+
+			if (WARN_ON(!bio_add_folio(bio, folio, len, 0))) {
 				bio->bi_status = BLK_STS_RESOURCE;
 				bio_endio(bio);
 				*skipped = 1;
-				return max_sync;
+				return len;
 			}
 		}
 		nr_sectors += len>>9;
 		sector_nr += len>>9;
-	} while (++page_idx < RESYNC_PAGES);
+	} while (0);
 	r10_bio->sectors = nr_sectors;
 
 	if (mddev_is_clustered(mddev) &&
@@ -4578,7 +4567,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 				int *skipped)
 {
 	/* We simply copy at most one chunk (smallest of old and new)
-	 * at a time, possibly less if that exceeds RESYNC_PAGES,
+	 * at a time, possibly less if that exceeds RESYNC_BLOCK_SIZE,
 	 * or we hit a bad block or something.
 	 * This might mean we pause for normal IO in the middle of
 	 * a chunk, but that is not a problem as mddev->reshape_position
@@ -4618,14 +4607,13 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 	struct r10bio *r10_bio;
 	sector_t next, safe, last;
 	int max_sectors;
-	int nr_sectors;
 	int s;
 	struct md_rdev *rdev;
 	int need_flush = 0;
 	struct bio *blist;
 	struct bio *bio, *read_bio;
 	int sectors_done = 0;
-	struct page **pages;
+	struct folio *folio;
 
 	if (sector_nr == 0) {
 		/* If restarting in the middle, skip the initial sectors */
@@ -4741,7 +4729,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 		return sectors_done;
 	}
 
-	read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ,
+	read_bio = bio_alloc_bioset(rdev->bdev, 1, REQ_OP_READ,
 				    GFP_KERNEL, &mddev->bio_set);
 	read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
 			       + rdev->data_offset);
@@ -4805,32 +4793,23 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 		blist = b;
 	}
 
-	/* Now add as many pages as possible to all of these bios. */
+	/* Now add folio to all of these bios. */
 
-	nr_sectors = 0;
-	pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
-	for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) {
-		struct page *page = pages[s / (PAGE_SIZE >> 9)];
-		int len = (max_sectors - s) << 9;
-		if (len > PAGE_SIZE)
-			len = PAGE_SIZE;
-		for (bio = blist; bio ; bio = bio->bi_next) {
-			if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
-				bio->bi_status = BLK_STS_RESOURCE;
-				bio_endio(bio);
-				return sectors_done;
-			}
+	folio = get_resync_folio(r10_bio->devs[0].bio)->folio;
+	for (bio = blist; bio ; bio = bio->bi_next) {
+		if (WARN_ON(!bio_add_folio(bio, folio, max_sectors, 0))) {
+			bio->bi_status = BLK_STS_RESOURCE;
+			bio_endio(bio);
+			return sectors_done;
 		}
-		sector_nr += len >> 9;
-		nr_sectors += len >> 9;
 	}
-	r10_bio->sectors = nr_sectors;
+	r10_bio->sectors = max_sectors >> 9;
 
 	/* Now submit the read */
 	atomic_inc(&r10_bio->remaining);
 	read_bio->bi_next = NULL;
 	submit_bio_noacct(read_bio);
-	sectors_done += nr_sectors;
+	sectors_done += max_sectors;
 	if (sector_nr <= last)
 		goto read_more;
 
@@ -4932,8 +4911,8 @@ static int handle_reshape_read_error(struct mddev *mddev,
 	struct r10conf *conf = mddev->private;
 	struct r10bio *r10b;
 	int slot = 0;
-	int idx = 0;
-	struct page **pages;
+	int sect = 0;
+	struct folio *folio;
 
 	r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO);
 	if (!r10b) {
@@ -4941,8 +4920,8 @@ static int handle_reshape_read_error(struct mddev *mddev,
 		return -ENOMEM;
 	}
 
-	/* reshape IOs share pages from .devs[0].bio */
-	pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
+	/* reshape IOs share folio from .devs[0].bio */
+	folio = get_resync_folio(r10_bio->devs[0].bio)->folio;
 
 	r10b->sector = r10_bio->sector;
 	__raid10_find_phys(&conf->prev, r10b);
@@ -4958,19 +4937,19 @@ static int handle_reshape_read_error(struct mddev *mddev,
 		while (!success) {
 			int d = r10b->devs[slot].devnum;
 			struct md_rdev *rdev = conf->mirrors[d].rdev;
-			sector_t addr;
 			if (rdev == NULL ||
 			    test_bit(Faulty, &rdev->flags) ||
 			    !test_bit(In_sync, &rdev->flags))
 				goto failed;
 
-			addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
 			atomic_inc(&rdev->nr_pending);
-			success = sync_page_io(rdev,
-					       addr,
-					       s << 9,
-					       pages[idx],
-					       REQ_OP_READ, false);
+			success = sync_folio_io(rdev,
+						r10b->devs[slot].addr +
+						sect,
+						s << 9,
+						sect << 9,
+						folio,
+						REQ_OP_READ, false);
 			rdev_dec_pending(rdev, mddev);
 			if (success)
 				break;
@@ -4989,7 +4968,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
 			return -EIO;
 		}
 		sectors -= s;
-		idx++;
+		sect += s;
 	}
 	kfree(r10b);
 	return 0;
-- 
2.39.2

Re: [PATCH 06/15] md/raid1,raid10: use folio for sync path IO

Posted by Xiao Ni 2 weeks, 6 days ago

On Wed, Dec 17, 2025 at 8:11 PM <linan666@huaweicloud.com> wrote:
>
> From: Li Nan <linan122@huawei.com>
>
> Convert all IO on the sync path to use folios. Rename page-related
> identifiers to match folio.
>
> Retain some now-unnecessary while and for loops to minimize code
> changes, clean them up in a subsequent patch.
>
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>  drivers/md/md.c       |   2 +-
>  drivers/md/raid1-10.c |  60 ++++--------
>  drivers/md/raid1.c    | 155 ++++++++++++++-----------------
>  drivers/md/raid10.c   | 207 +++++++++++++++++++-----------------------
>  4 files changed, 179 insertions(+), 245 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 0732bbcdb95d..dac03b831efa 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -9409,7 +9409,7 @@ static bool sync_io_within_limit(struct mddev *mddev)
>  {
>         /*
>          * For raid456, sync IO is stripe(4k) per IO, for other levels, it's
> -        * RESYNC_PAGES(64k) per IO.
> +        * RESYNC_BLOCK_SIZE(64k) per IO.
>          */
>         return atomic_read(&mddev->recovery_active) <
>                (raid_is_456(mddev) ? 8 : 128) * sync_io_depth(mddev);
> diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
> index 260d7fd7ccbe..b8f2cc32606f 100644
> --- a/drivers/md/raid1-10.c
> +++ b/drivers/md/raid1-10.c
> @@ -25,9 +25,9 @@
>  #define MAX_PLUG_BIO 32
>
>  /* for managing resync I/O pages */
> -struct resync_pages {
> +struct resync_folio {
>         void            *raid_bio;
> -       struct page     *pages[RESYNC_PAGES];
> +       struct folio    *folio;
>  };
>
>  struct raid1_plug_cb {
> @@ -41,77 +41,55 @@ static void rbio_pool_free(void *rbio, void *data)
>         kfree(rbio);
>  }
>
> -static inline int resync_alloc_pages(struct resync_pages *rp,
> +static inline int resync_alloc_folio(struct resync_folio *rf,
>                                      gfp_t gfp_flags)
>  {
> -       int i;
> -
> -       for (i = 0; i < RESYNC_PAGES; i++) {
> -               rp->pages[i] = alloc_page(gfp_flags);
> -               if (!rp->pages[i])
> -                       goto out_free;
> -       }
> +       rf->folio = folio_alloc(gfp_flags, get_order(RESYNC_BLOCK_SIZE));
> +       if (!rf->folio)
> +               return -ENOMEM;

Is it ok to add an error log here? Compare with the multipage
situation, the possibility of failure will be somewhat higher because
it needs to alloc a contiguous block of physical memory.

>
>         return 0;
> -
> -out_free:
> -       while (--i >= 0)
> -               put_page(rp->pages[i]);
> -       return -ENOMEM;
>  }
>
> -static inline void resync_free_pages(struct resync_pages *rp)
> +static inline void resync_free_folio(struct resync_folio *rf)
>  {
> -       int i;
> -
> -       for (i = 0; i < RESYNC_PAGES; i++)
> -               put_page(rp->pages[i]);
> +       folio_put(rf->folio);
>  }
>
> -static inline void resync_get_all_pages(struct resync_pages *rp)
> +static inline void resync_get_all_folio(struct resync_folio *rf)
>  {
> -       int i;
> -
> -       for (i = 0; i < RESYNC_PAGES; i++)
> -               get_page(rp->pages[i]);
> +       folio_get(rf->folio);
>  }
>
> -static inline struct page *resync_fetch_page(struct resync_pages *rp,
> -                                            unsigned idx)
> +static inline struct folio *resync_fetch_folio(struct resync_folio *rf)
>  {
> -       if (WARN_ON_ONCE(idx >= RESYNC_PAGES))
> -               return NULL;
> -       return rp->pages[idx];
> +       return rf->folio;
>  }
>
>  /*
> - * 'strct resync_pages' stores actual pages used for doing the resync
> + * 'strct resync_folio' stores actual pages used for doing the resync
>   *  IO, and it is per-bio, so make .bi_private points to it.
>   */
> -static inline struct resync_pages *get_resync_pages(struct bio *bio)
> +static inline struct resync_folio *get_resync_folio(struct bio *bio)
>  {
>         return bio->bi_private;
>  }
>
>  /* generally called after bio_reset() for reseting bvec */
> -static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
> +static void md_bio_reset_resync_folio(struct bio *bio, struct resync_folio *rf,
>                                int size)
>  {
> -       int idx = 0;
> -
>         /* initialize bvec table again */
>         do {
> -               struct page *page = resync_fetch_page(rp, idx);
> -               int len = min_t(int, size, PAGE_SIZE);
> +               struct folio *folio = resync_fetch_folio(rf);
> +               int len = min_t(int, size, RESYNC_BLOCK_SIZE);
>
> -               if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
> +               if (WARN_ON(!bio_add_folio(bio, folio, len, 0))) {

Is it ok to use bio_add_folio(bio, folio, RESYNC_BLOCK_SIZE, 0)
directly here? It removes `size -= len` below, so it's not useless to
compare size and RESYNC_BLOCK_SIZE above?

>                         bio->bi_status = BLK_STS_RESOURCE;
>                         bio_endio(bio);
>                         return;
>                 }
> -
> -               size -= len;
> -       } while (idx++ < RESYNC_PAGES && size > 0);
> +       } while (0);
>  }
>
>
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 43453f1a04f4..370bdecf5487 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -120,11 +120,11 @@ static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
>
>  /*
>   * for resync bio, r1bio pointer can be retrieved from the per-bio
> - * 'struct resync_pages'.
> + * 'struct resync_folio'.
>   */
>  static inline struct r1bio *get_resync_r1bio(struct bio *bio)
>  {
> -       return get_resync_pages(bio)->raid_bio;
> +       return get_resync_folio(bio)->raid_bio;
>  }
>
>  static void *r1bio_pool_alloc(gfp_t gfp_flags, struct r1conf *conf)
> @@ -146,70 +146,69 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
>         struct r1conf *conf = data;
>         struct r1bio *r1_bio;
>         struct bio *bio;
> -       int need_pages;
> +       int need_folio;
>         int j;
> -       struct resync_pages *rps;
> +       struct resync_folio *rfs;
>
>         r1_bio = r1bio_pool_alloc(gfp_flags, conf);
>         if (!r1_bio)
>                 return NULL;
>
> -       rps = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_pages),
> +       rfs = kmalloc_array(conf->raid_disks * 2, sizeof(struct resync_folio),
>                             gfp_flags);
> -       if (!rps)
> +       if (!rfs)
>                 goto out_free_r1bio;
>
>         /*
>          * Allocate bios : 1 for reading, n-1 for writing
>          */
>         for (j = conf->raid_disks * 2; j-- ; ) {
> -               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
> +               bio = bio_kmalloc(1, gfp_flags);
>                 if (!bio)
>                         goto out_free_bio;
> -               bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
> +               bio_init_inline(bio, NULL, 1, 0);
>                 r1_bio->bios[j] = bio;
>         }
>         /*
> -        * Allocate RESYNC_PAGES data pages and attach them to
> -        * the first bio.
> +        * Allocate data folio and attach them to the first bio.

typo error
s/attach them/attach it/g

>          * If this is a user-requested check/repair, allocate
> -        * RESYNC_PAGES for each bio.
> +        * folio for each bio.
>          */
>         if (test_bit(MD_RECOVERY_REQUESTED, &conf->mddev->recovery))
> -               need_pages = conf->raid_disks * 2;
> +               need_folio = conf->raid_disks * 2;
>         else
> -               need_pages = 1;
> +               need_folio = 1;
>         for (j = 0; j < conf->raid_disks * 2; j++) {
> -               struct resync_pages *rp = &rps[j];
> +               struct resync_folio *rf = &rfs[j];
>
>                 bio = r1_bio->bios[j];
>
> -               if (j < need_pages) {
> -                       if (resync_alloc_pages(rp, gfp_flags))
> -                               goto out_free_pages;
> +               if (j < need_folio) {
> +                       if (resync_alloc_folio(rf, gfp_flags))
> +                               goto out_free_folio;
>                 } else {
> -                       memcpy(rp, &rps[0], sizeof(*rp));
> -                       resync_get_all_pages(rp);
> +                       memcpy(rf, &rfs[0], sizeof(*rf));
> +                       resync_get_all_folio(rf);
>                 }
>
> -               rp->raid_bio = r1_bio;
> -               bio->bi_private = rp;
> +               rf->raid_bio = r1_bio;
> +               bio->bi_private = rf;
>         }
>
>         r1_bio->master_bio = NULL;
>
>         return r1_bio;
>
> -out_free_pages:
> +out_free_folio:
>         while (--j >= 0)
> -               resync_free_pages(&rps[j]);
> +               resync_free_folio(&rfs[j]);
>
>  out_free_bio:
>         while (++j < conf->raid_disks * 2) {
>                 bio_uninit(r1_bio->bios[j]);
>                 kfree(r1_bio->bios[j]);
>         }
> -       kfree(rps);
> +       kfree(rfs);
>
>  out_free_r1bio:
>         rbio_pool_free(r1_bio, data);
> @@ -221,17 +220,17 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
>         struct r1conf *conf = data;
>         int i;
>         struct r1bio *r1bio = __r1_bio;
> -       struct resync_pages *rp = NULL;
> +       struct resync_folio *rf = NULL;
>
>         for (i = conf->raid_disks * 2; i--; ) {
> -               rp = get_resync_pages(r1bio->bios[i]);
> -               resync_free_pages(rp);
> +               rf = get_resync_folio(r1bio->bios[i]);
> +               resync_free_folio(rf);
>                 bio_uninit(r1bio->bios[i]);
>                 kfree(r1bio->bios[i]);
>         }
>
> -       /* resync pages array stored in the 1st bio's .bi_private */
> -       kfree(rp);
> +       /* resync folio stored in the 1st bio's .bi_private */
> +       kfree(rf);
>
>         rbio_pool_free(r1bio, data);
>  }
> @@ -2095,10 +2094,10 @@ static void end_sync_write(struct bio *bio)
>         put_sync_write_buf(r1_bio);
>  }
>
> -static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
> -                          int sectors, struct page *page, blk_opf_t rw)
> +static int r1_sync_folio_io(struct md_rdev *rdev, sector_t sector, int sectors,
> +                           int off, struct folio *folio, blk_opf_t rw)
>  {
> -       if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
> +       if (sync_folio_io(rdev, sector, sectors << 9, off, folio, rw, false))
>                 /* success */
>                 return 1;
>         if (rw == REQ_OP_WRITE) {
> @@ -2129,10 +2128,10 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
>         struct mddev *mddev = r1_bio->mddev;
>         struct r1conf *conf = mddev->private;
>         struct bio *bio = r1_bio->bios[r1_bio->read_disk];
> -       struct page **pages = get_resync_pages(bio)->pages;
> +       struct folio *folio = get_resync_folio(bio)->folio;
>         sector_t sect = r1_bio->sector;
>         int sectors = r1_bio->sectors;
> -       int idx = 0;
> +       int off = 0;
>         struct md_rdev *rdev;
>
>         rdev = conf->mirrors[r1_bio->read_disk].rdev;
> @@ -2162,9 +2161,8 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
>                                  * active, and resync is currently active
>                                  */
>                                 rdev = conf->mirrors[d].rdev;
> -                               if (sync_page_io(rdev, sect, s<<9,
> -                                                pages[idx],
> -                                                REQ_OP_READ, false)) {
> +                               if (sync_folio_io(rdev, sect, s<<9, off, folio,
> +                                                 REQ_OP_READ, false)) {
>                                         success = 1;
>                                         break;
>                                 }
> @@ -2197,7 +2195,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
>                         /* Try next page */
>                         sectors -= s;
>                         sect += s;
> -                       idx++;
> +                       off += s << 9;
>                         continue;
>                 }
>
> @@ -2210,8 +2208,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
>                         if (r1_bio->bios[d]->bi_end_io != end_sync_read)
>                                 continue;
>                         rdev = conf->mirrors[d].rdev;
> -                       if (r1_sync_page_io(rdev, sect, s,
> -                                           pages[idx],
> +                       if (r1_sync_folio_io(rdev, sect, s, off, folio,
>                                             REQ_OP_WRITE) == 0) {
>                                 r1_bio->bios[d]->bi_end_io = NULL;
>                                 rdev_dec_pending(rdev, mddev);
> @@ -2225,14 +2222,13 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
>                         if (r1_bio->bios[d]->bi_end_io != end_sync_read)
>                                 continue;
>                         rdev = conf->mirrors[d].rdev;
> -                       if (r1_sync_page_io(rdev, sect, s,
> -                                           pages[idx],
> +                       if (r1_sync_folio_io(rdev, sect, s, off, folio,
>                                             REQ_OP_READ) != 0)
>                                 atomic_add(s, &rdev->corrected_errors);
>                 }
>                 sectors -= s;
>                 sect += s;
> -               idx ++;
> +               off += s << 9;
>         }
>         set_bit(R1BIO_Uptodate, &r1_bio->state);
>         bio->bi_status = 0;
> @@ -2252,14 +2248,12 @@ static void process_checks(struct r1bio *r1_bio)
>         struct r1conf *conf = mddev->private;
>         int primary;
>         int i;
> -       int vcnt;
>
>         /* Fix variable parts of all bios */
> -       vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
>         for (i = 0; i < conf->raid_disks * 2; i++) {
>                 blk_status_t status;
>                 struct bio *b = r1_bio->bios[i];
> -               struct resync_pages *rp = get_resync_pages(b);
> +               struct resync_folio *rf = get_resync_folio(b);
>                 if (b->bi_end_io != end_sync_read)
>                         continue;
>                 /* fixup the bio for reuse, but preserve errno */
> @@ -2269,11 +2263,11 @@ static void process_checks(struct r1bio *r1_bio)
>                 b->bi_iter.bi_sector = r1_bio->sector +
>                         conf->mirrors[i].rdev->data_offset;
>                 b->bi_end_io = end_sync_read;
> -               rp->raid_bio = r1_bio;
> -               b->bi_private = rp;
> +               rf->raid_bio = r1_bio;
> +               b->bi_private = rf;
>
>                 /* initialize bvec table again */
> -               md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9);
> +               md_bio_reset_resync_folio(b, rf, r1_bio->sectors << 9);
>         }
>         for (primary = 0; primary < conf->raid_disks * 2; primary++)
>                 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
> @@ -2284,44 +2278,30 @@ static void process_checks(struct r1bio *r1_bio)
>                 }
>         r1_bio->read_disk = primary;
>         for (i = 0; i < conf->raid_disks * 2; i++) {
> -               int j = 0;
>                 struct bio *pbio = r1_bio->bios[primary];
>                 struct bio *sbio = r1_bio->bios[i];
>                 blk_status_t status = sbio->bi_status;
> -               struct page **ppages = get_resync_pages(pbio)->pages;
> -               struct page **spages = get_resync_pages(sbio)->pages;
> -               struct bio_vec *bi;
> -               int page_len[RESYNC_PAGES] = { 0 };
> -               struct bvec_iter_all iter_all;
> +               struct folio *pfolio = get_resync_folio(pbio)->folio;
> +               struct folio *sfolio = get_resync_folio(sbio)->folio;
>
>                 if (sbio->bi_end_io != end_sync_read)
>                         continue;
>                 /* Now we can 'fixup' the error value */
>                 sbio->bi_status = 0;
>
> -               bio_for_each_segment_all(bi, sbio, iter_all)
> -                       page_len[j++] = bi->bv_len;
> -
> -               if (!status) {
> -                       for (j = vcnt; j-- ; ) {
> -                               if (memcmp(page_address(ppages[j]),
> -                                          page_address(spages[j]),
> -                                          page_len[j]))
> -                                       break;
> -                       }
> -               } else
> -                       j = 0;
> -               if (j >= 0)
> +               if (status || memcmp(folio_address(pfolio),
> +                                    folio_address(sfolio),
> +                                    r1_bio->sectors << 9)) {
>                         atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
> -               if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
> -                             && !status)) {
> -                       /* No need to write to this device. */
> -                       sbio->bi_end_io = NULL;
> -                       rdev_dec_pending(conf->mirrors[i].rdev, mddev);
> -                       continue;
> +                       if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
> +                               bio_copy_data(sbio, pbio);
> +                               continue;
> +                       }

The logic is changed here. The original logic:
1. read ok, no mismatch: no bio_copy_data
2. read ok, mismatch, check: no bio_copy_data
3. read ok, mismatch, no check: need bio_copy_data
4. read fail: need bio_copy_data

The 4 is broken.

How about adding a temporary need_write to make logic more clear?

something like:
        if (!status) {
            int ret = 0;
            ret = memcpy(folio_address(pfolio),
                             folio_address(sfolio),
                             r1_bio->sectors << 9);
            if (ret) {
                atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
                if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
                    need_write = true;
            }
        } else
            need_write = true;

        if (need_write)
            bio_copy_data(sbio, pbio);
        else {
            /* No need to write to this device. */
            sbio->bi_end_io = NULL;
            rdev_dec_pending(conf->mirrors[i].rdev, mddev);
        }

>                 }
>
> -               bio_copy_data(sbio, pbio);
> +               /* No need to write to this device. */
> +               sbio->bi_end_io = NULL;
> +               rdev_dec_pending(conf->mirrors[i].rdev, mddev);
>         }
>  }
>
> @@ -2446,9 +2426,8 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>                         if (rdev &&
>                             !test_bit(Faulty, &rdev->flags)) {
>                                 atomic_inc(&rdev->nr_pending);
> -                               r1_sync_page_io(rdev, sect, s,
> -                                               folio_page(conf->tmpfolio, 0),
> -                                               REQ_OP_WRITE);
> +                               r1_sync_folio_io(rdev, sect, s, 0,
> +                                               conf->tmpfolio, REQ_OP_WRITE);
>                                 rdev_dec_pending(rdev, mddev);
>                         }
>                 }
> @@ -2461,9 +2440,8 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>                         if (rdev &&
>                             !test_bit(Faulty, &rdev->flags)) {
>                                 atomic_inc(&rdev->nr_pending);
> -                               if (r1_sync_page_io(rdev, sect, s,
> -                                               folio_page(conf->tmpfolio, 0),
> -                                               REQ_OP_READ)) {
> +                               if (r1_sync_folio_io(rdev, sect, s, 0,
> +                                               conf->tmpfolio, REQ_OP_READ)) {
>                                         atomic_add(s, &rdev->corrected_errors);
>                                         pr_info("md/raid1:%s: read error corrected (%d sectors at %llu on %pg)\n",
>                                                 mdname(mddev), s,
> @@ -2799,7 +2777,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
>         int good_sectors = RESYNC_SECTORS;
>         int min_bad = 0; /* number of sectors that are bad in all devices */
>         int idx = sector_to_idx(sector_nr);
> -       int page_idx = 0;
>
>         if (!mempool_initialized(&conf->r1buf_pool))
>                 if (init_resync(conf))
> @@ -3003,8 +2980,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
>         nr_sectors = 0;
>         sync_blocks = 0;
>         do {
> -               struct page *page;
> -               int len = PAGE_SIZE;
> +               struct folio *folio;
> +               int len = RESYNC_BLOCK_SIZE;
>                 if (sector_nr + (len>>9) > max_sector)
>                         len = (max_sector - sector_nr) << 9;
>                 if (len == 0)
> @@ -3020,24 +2997,24 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
>                 }
>
>                 for (i = 0 ; i < conf->raid_disks * 2; i++) {
> -                       struct resync_pages *rp;
> +                       struct resync_folio *rf;
>
>                         bio = r1_bio->bios[i];
> -                       rp = get_resync_pages(bio);
> +                       rf = get_resync_folio(bio);
>                         if (bio->bi_end_io) {
> -                               page = resync_fetch_page(rp, page_idx);
> +                               folio = resync_fetch_folio(rf);
>
>                                 /*
>                                  * won't fail because the vec table is big
>                                  * enough to hold all these pages
>                                  */

The comments above may not be needed anymore. Because there is only
one vec in the bio.

> -                               __bio_add_page(bio, page, len, 0);
> +                               bio_add_folio_nofail(bio, folio, len, 0);
>                         }
>                 }
>                 nr_sectors += len>>9;
>                 sector_nr += len>>9;
>                 sync_blocks -= (len>>9);

These three lines are not needed anymore.

> -       } while (++page_idx < RESYNC_PAGES);
> +       } while (0);
>
>         r1_bio->sectors = nr_sectors;
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 09238dc9cde6..c93706806358 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -96,11 +96,11 @@ static void end_reshape(struct r10conf *conf);
>
>  /*
>   * for resync bio, r10bio pointer can be retrieved from the per-bio
> - * 'struct resync_pages'.
> + * 'struct resync_folio'.
>   */
>  static inline struct r10bio *get_resync_r10bio(struct bio *bio)
>  {
> -       return get_resync_pages(bio)->raid_bio;
> +       return get_resync_folio(bio)->raid_bio;
>  }
>
>  static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
> @@ -133,8 +133,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
>         struct r10bio *r10_bio;
>         struct bio *bio;
>         int j;
> -       int nalloc, nalloc_rp;
> -       struct resync_pages *rps;
> +       int nalloc, nalloc_rf;
> +       struct resync_folio *rfs;
>
>         r10_bio = r10bio_pool_alloc(gfp_flags, conf);
>         if (!r10_bio)
> @@ -148,58 +148,57 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
>
>         /* allocate once for all bios */
>         if (!conf->have_replacement)
> -               nalloc_rp = nalloc;
> +               nalloc_rf = nalloc;
>         else
> -               nalloc_rp = nalloc * 2;
> -       rps = kmalloc_array(nalloc_rp, sizeof(struct resync_pages), gfp_flags);
> -       if (!rps)
> +               nalloc_rf = nalloc * 2;
> +       rfs = kmalloc_array(nalloc_rf, sizeof(struct resync_folio), gfp_flags);
> +       if (!rfs)
>                 goto out_free_r10bio;
>
>         /*
>          * Allocate bios.
>          */
>         for (j = nalloc ; j-- ; ) {
> -               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
> +               bio = bio_kmalloc(1, gfp_flags);
>                 if (!bio)
>                         goto out_free_bio;
> -               bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
> +               bio_init_inline(bio, NULL, 1, 0);
>                 r10_bio->devs[j].bio = bio;
>                 if (!conf->have_replacement)
>                         continue;
> -               bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
> +               bio = bio_kmalloc(1, gfp_flags);
>                 if (!bio)
>                         goto out_free_bio;
> -               bio_init_inline(bio, NULL, RESYNC_PAGES, 0);
> +               bio_init_inline(bio, NULL, 1, 0);
>                 r10_bio->devs[j].repl_bio = bio;
>         }
>         /*
> -        * Allocate RESYNC_PAGES data pages and attach them
> -        * where needed.
> +        * Allocate data folio and attach them where needed.

typo error
s/attach them/attach it/g

>          */
>         for (j = 0; j < nalloc; j++) {
>                 struct bio *rbio = r10_bio->devs[j].repl_bio;
> -               struct resync_pages *rp, *rp_repl;
> +               struct resync_folio *rf, *rf_repl;
>
> -               rp = &rps[j];
> +               rf = &rfs[j];
>                 if (rbio)
> -                       rp_repl = &rps[nalloc + j];
> +                       rf_repl = &rfs[nalloc + j];
>
>                 bio = r10_bio->devs[j].bio;
>
>                 if (!j || test_bit(MD_RECOVERY_SYNC,
>                                    &conf->mddev->recovery)) {
> -                       if (resync_alloc_pages(rp, gfp_flags))
> +                       if (resync_alloc_folio(rf, gfp_flags))
>                                 goto out_free_pages;

s/out_free_pages/out_free_folio/g

>                 } else {
> -                       memcpy(rp, &rps[0], sizeof(*rp));
> -                       resync_get_all_pages(rp);
> +                       memcpy(rf, &rfs[0], sizeof(*rf));
> +                       resync_get_all_folio(rf);

Maybe the name resync_get_folio is better?

>                 }
>
> -               rp->raid_bio = r10_bio;
> -               bio->bi_private = rp;
> +               rf->raid_bio = r10_bio;
> +               bio->bi_private = rf;
>                 if (rbio) {
> -                       memcpy(rp_repl, rp, sizeof(*rp));
> -                       rbio->bi_private = rp_repl;
> +                       memcpy(rf_repl, rf, sizeof(*rf));
> +                       rbio->bi_private = rf_repl;
>                 }
>         }
>
> @@ -207,7 +206,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
>
>  out_free_pages:
>         while (--j >= 0)
> -               resync_free_pages(&rps[j]);
> +               resync_free_folio(&rfs[j]);
>
>         j = 0;
>  out_free_bio:
> @@ -219,7 +218,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
>                         bio_uninit(r10_bio->devs[j].repl_bio);
>                 kfree(r10_bio->devs[j].repl_bio);
>         }
> -       kfree(rps);
> +       kfree(rfs);
>  out_free_r10bio:
>         rbio_pool_free(r10_bio, conf);
>         return NULL;
> @@ -230,14 +229,14 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
>         struct r10conf *conf = data;
>         struct r10bio *r10bio = __r10_bio;
>         int j;
> -       struct resync_pages *rp = NULL;
> +       struct resync_folio *rf = NULL;
>
>         for (j = conf->copies; j--; ) {
>                 struct bio *bio = r10bio->devs[j].bio;
>
>                 if (bio) {
> -                       rp = get_resync_pages(bio);
> -                       resync_free_pages(rp);
> +                       rf = get_resync_folio(bio);
> +                       resync_free_folio(rf);
>                         bio_uninit(bio);
>                         kfree(bio);
>                 }
> @@ -250,7 +249,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
>         }
>
>         /* resync pages array stored in the 1st bio's .bi_private */
> -       kfree(rp);
> +       kfree(rf);
>
>         rbio_pool_free(r10bio, conf);
>  }
> @@ -2342,8 +2341,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>         struct r10conf *conf = mddev->private;
>         int i, first;
>         struct bio *tbio, *fbio;
> -       int vcnt;
> -       struct page **tpages, **fpages;
> +       struct folio *tfolio, *ffolio;
>
>         atomic_set(&r10_bio->remaining, 1);
>
> @@ -2359,14 +2357,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>         fbio = r10_bio->devs[i].bio;
>         fbio->bi_iter.bi_size = r10_bio->sectors << 9;
>         fbio->bi_iter.bi_idx = 0;
> -       fpages = get_resync_pages(fbio)->pages;
> +       ffolio = get_resync_folio(fbio)->folio;
>
> -       vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
>         /* now find blocks with errors */
>         for (i=0 ; i < conf->copies ; i++) {
> -               int  j, d;
> +               int  d;
>                 struct md_rdev *rdev;
> -               struct resync_pages *rp;
> +               struct resync_folio *rf;
>
>                 tbio = r10_bio->devs[i].bio;
>
> @@ -2375,31 +2372,23 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>                 if (i == first)
>                         continue;
>
> -               tpages = get_resync_pages(tbio)->pages;
> +               tfolio = get_resync_folio(tbio)->folio;
>                 d = r10_bio->devs[i].devnum;
>                 rdev = conf->mirrors[d].rdev;
>                 if (!r10_bio->devs[i].bio->bi_status) {
>                         /* We know that the bi_io_vec layout is the same for
>                          * both 'first' and 'i', so we just compare them.
> -                        * All vec entries are PAGE_SIZE;
>                          */
> -                       int sectors = r10_bio->sectors;
> -                       for (j = 0; j < vcnt; j++) {
> -                               int len = PAGE_SIZE;
> -                               if (sectors < (len / 512))
> -                                       len = sectors * 512;
> -                               if (memcmp(page_address(fpages[j]),
> -                                          page_address(tpages[j]),
> -                                          len))
> -                                       break;
> -                               sectors -= len/512;
> +                       if (memcmp(folio_address(ffolio),
> +                                  folio_address(tfolio),
> +                                  r10_bio->sectors << 9)) {
> +                               atomic64_add(r10_bio->sectors,
> +                                            &mddev->resync_mismatches);
> +                               if (test_bit(MD_RECOVERY_CHECK,
> +                                            &mddev->recovery))
> +                                       /* Don't fix anything. */
> +                                       continue;
>                         }
> -                       if (j == vcnt)
> -                               continue;
> -                       atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
> -                       if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
> -                               /* Don't fix anything. */
> -                               continue;
>                 } else if (test_bit(FailFast, &rdev->flags)) {
>                         /* Just give up on this device */
>                         md_error(rdev->mddev, rdev);
> @@ -2410,13 +2399,13 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
>                  * First we need to fixup bv_offset, bv_len and
>                  * bi_vecs, as the read request might have corrupted these
>                  */
> -               rp = get_resync_pages(tbio);
> +               rf = get_resync_folio(tbio);
>                 bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE);
>
> -               md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
> +               md_bio_reset_resync_folio(tbio, rf, fbio->bi_iter.bi_size);
>
> -               rp->raid_bio = r10_bio;
> -               tbio->bi_private = rp;
> +               rf->raid_bio = r10_bio;
> +               tbio->bi_private = rf;
>                 tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
>                 tbio->bi_end_io = end_sync_write;
>
> @@ -2476,10 +2465,9 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
>         struct bio *bio = r10_bio->devs[0].bio;
>         sector_t sect = 0;
>         int sectors = r10_bio->sectors;
> -       int idx = 0;
>         int dr = r10_bio->devs[0].devnum;
>         int dw = r10_bio->devs[1].devnum;
> -       struct page **pages = get_resync_pages(bio)->pages;
> +       struct folio *folio = get_resync_folio(bio)->folio;
>
>         while (sectors) {
>                 int s = sectors;
> @@ -2492,19 +2480,21 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
>
>                 rdev = conf->mirrors[dr].rdev;
>                 addr = r10_bio->devs[0].addr + sect;
> -               ok = sync_page_io(rdev,
> -                                 addr,
> -                                 s << 9,
> -                                 pages[idx],
> -                                 REQ_OP_READ, false);
> +               ok = sync_folio_io(rdev,
> +                                  addr,
> +                                  s << 9,
> +                                  sect << 9,
> +                                  folio,
> +                                  REQ_OP_READ, false);

By the comments at the beginning of fix_recovery_read_error, it needs
to submit io with a page size unit, right? If so, it still needs to
use sync_page_io here.

>                 if (ok) {
>                         rdev = conf->mirrors[dw].rdev;
>                         addr = r10_bio->devs[1].addr + sect;
> -                       ok = sync_page_io(rdev,
> -                                         addr,
> -                                         s << 9,
> -                                         pages[idx],
> -                                         REQ_OP_WRITE, false);
> +                       ok = sync_folio_io(rdev,
> +                                          addr,
> +                                          s << 9,
> +                                          sect << 9,
> +                                          folio,
> +                                          REQ_OP_WRITE, false);
>                         if (!ok) {
>                                 set_bit(WriteErrorSeen, &rdev->flags);
>                                 if (!test_and_set_bit(WantReplacement,
> @@ -2539,7 +2529,6 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
>
>                 sectors -= s;
>                 sect += s;
> -               idx++;
>         }
>  }
>
> @@ -3174,7 +3163,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>         int max_sync = RESYNC_SECTORS;
>         sector_t sync_blocks;
>         sector_t chunk_mask = conf->geo.chunk_mask;
> -       int page_idx = 0;
>
>         /*
>          * Allow skipping a full rebuild for incremental assembly
> @@ -3277,7 +3265,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>          * with 2 bios in each, that correspond to the bios in the main one.
>          * In this case, the subordinate r10bios link back through a
>          * borrowed master_bio pointer, and the counter in the master
> -        * includes a ref from each subordinate.
> +        * bio_add_folio includes a ref from each subordinate.

What's the reason change this? And I don't understand the new version.

Best Regards
Xiao
>          */
>         /* First, we decide what to do and set ->bi_end_io
>          * To end_sync_read if we want to read, and
> @@ -3642,25 +3630,26 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>         if (sector_nr + max_sync < max_sector)
>                 max_sector = sector_nr + max_sync;
>         do {
> -               struct page *page;
> -               int len = PAGE_SIZE;
> +               int len = RESYNC_BLOCK_SIZE;
> +
>                 if (sector_nr + (len>>9) > max_sector)
>                         len = (max_sector - sector_nr) << 9;
>                 if (len == 0)
>                         break;
>                 for (bio= biolist ; bio ; bio=bio->bi_next) {
> -                       struct resync_pages *rp = get_resync_pages(bio);
> -                       page = resync_fetch_page(rp, page_idx);
> -                       if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
> +                       struct resync_folio *rf = get_resync_folio(bio);
> +                       struct folio *folio = resync_fetch_folio(rf);
> +
> +                       if (WARN_ON(!bio_add_folio(bio, folio, len, 0))) {
>                                 bio->bi_status = BLK_STS_RESOURCE;
>                                 bio_endio(bio);
>                                 *skipped = 1;
> -                               return max_sync;
> +                               return len;
>                         }
>                 }
>                 nr_sectors += len>>9;
>                 sector_nr += len>>9;
> -       } while (++page_idx < RESYNC_PAGES);
> +       } while (0);
>         r10_bio->sectors = nr_sectors;
>
>         if (mddev_is_clustered(mddev) &&
> @@ -4578,7 +4567,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>                                 int *skipped)
>  {
>         /* We simply copy at most one chunk (smallest of old and new)
> -        * at a time, possibly less if that exceeds RESYNC_PAGES,
> +        * at a time, possibly less if that exceeds RESYNC_BLOCK_SIZE,
>          * or we hit a bad block or something.
>          * This might mean we pause for normal IO in the middle of
>          * a chunk, but that is not a problem as mddev->reshape_position
> @@ -4618,14 +4607,13 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>         struct r10bio *r10_bio;
>         sector_t next, safe, last;
>         int max_sectors;
> -       int nr_sectors;
>         int s;
>         struct md_rdev *rdev;
>         int need_flush = 0;
>         struct bio *blist;
>         struct bio *bio, *read_bio;
>         int sectors_done = 0;
> -       struct page **pages;
> +       struct folio *folio;
>
>         if (sector_nr == 0) {
>                 /* If restarting in the middle, skip the initial sectors */
> @@ -4741,7 +4729,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>                 return sectors_done;
>         }
>
> -       read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ,
> +       read_bio = bio_alloc_bioset(rdev->bdev, 1, REQ_OP_READ,
>                                     GFP_KERNEL, &mddev->bio_set);
>         read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
>                                + rdev->data_offset);
> @@ -4805,32 +4793,23 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
>                 blist = b;
>         }
>
> -       /* Now add as many pages as possible to all of these bios. */
> +       /* Now add folio to all of these bios. */
>
> -       nr_sectors = 0;
> -       pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
> -       for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) {
> -               struct page *page = pages[s / (PAGE_SIZE >> 9)];
> -               int len = (max_sectors - s) << 9;
> -               if (len > PAGE_SIZE)
> -                       len = PAGE_SIZE;
> -               for (bio = blist; bio ; bio = bio->bi_next) {
> -                       if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
> -                               bio->bi_status = BLK_STS_RESOURCE;
> -                               bio_endio(bio);
> -                               return sectors_done;
> -                       }
> +       folio = get_resync_folio(r10_bio->devs[0].bio)->folio;
> +       for (bio = blist; bio ; bio = bio->bi_next) {
> +               if (WARN_ON(!bio_add_folio(bio, folio, max_sectors, 0))) {
> +                       bio->bi_status = BLK_STS_RESOURCE;
> +                       bio_endio(bio);
> +                       return sectors_done;
>                 }
> -               sector_nr += len >> 9;
> -               nr_sectors += len >> 9;
>         }
> -       r10_bio->sectors = nr_sectors;
> +       r10_bio->sectors = max_sectors >> 9;
>
>         /* Now submit the read */
>         atomic_inc(&r10_bio->remaining);
>         read_bio->bi_next = NULL;
>         submit_bio_noacct(read_bio);
> -       sectors_done += nr_sectors;
> +       sectors_done += max_sectors;
>         if (sector_nr <= last)
>                 goto read_more;
>
> @@ -4932,8 +4911,8 @@ static int handle_reshape_read_error(struct mddev *mddev,
>         struct r10conf *conf = mddev->private;
>         struct r10bio *r10b;
>         int slot = 0;
> -       int idx = 0;
> -       struct page **pages;
> +       int sect = 0;
> +       struct folio *folio;
>
>         r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO);
>         if (!r10b) {
> @@ -4941,8 +4920,8 @@ static int handle_reshape_read_error(struct mddev *mddev,
>                 return -ENOMEM;
>         }
>
> -       /* reshape IOs share pages from .devs[0].bio */
> -       pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
> +       /* reshape IOs share folio from .devs[0].bio */
> +       folio = get_resync_folio(r10_bio->devs[0].bio)->folio;
>
>         r10b->sector = r10_bio->sector;
>         __raid10_find_phys(&conf->prev, r10b);
> @@ -4958,19 +4937,19 @@ static int handle_reshape_read_error(struct mddev *mddev,
>                 while (!success) {
>                         int d = r10b->devs[slot].devnum;
>                         struct md_rdev *rdev = conf->mirrors[d].rdev;
> -                       sector_t addr;
>                         if (rdev == NULL ||
>                             test_bit(Faulty, &rdev->flags) ||
>                             !test_bit(In_sync, &rdev->flags))
>                                 goto failed;
>
> -                       addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
>                         atomic_inc(&rdev->nr_pending);
> -                       success = sync_page_io(rdev,
> -                                              addr,
> -                                              s << 9,
> -                                              pages[idx],
> -                                              REQ_OP_READ, false);
> +                       success = sync_folio_io(rdev,
> +                                               r10b->devs[slot].addr +
> +                                               sect,
> +                                               s << 9,
> +                                               sect << 9,
> +                                               folio,
> +                                               REQ_OP_READ, false);
>                         rdev_dec_pending(rdev, mddev);
>                         if (success)
>                                 break;
> @@ -4989,7 +4968,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
>                         return -EIO;
>                 }
>                 sectors -= s;
> -               idx++;
> +               sect += s;
>         }
>         kfree(r10b);
>         return 0;
> --
> 2.39.2
>

Re: [PATCH 06/15] md/raid1,raid10: use folio for sync path IO

Posted by Li Nan 2 weeks, 5 days ago


在 2026/1/20 23:53, Xiao Ni 写道:
> On Wed, Dec 17, 2025 at 8:11 PM <linan666@huaweicloud.com> wrote:
>>
>> From: Li Nan <linan122@huawei.com>
>>
>> Convert all IO on the sync path to use folios. Rename page-related
>> identifiers to match folio.
>>
>> Retain some now-unnecessary while and for loops to minimize code
>> changes, clean them up in a subsequent patch.
>>
>> Signed-off-by: Li Nan <linan122@huawei.com >> -static inline int resync_alloc_pages(struct resync_pages *rp,
>> +static inline int resync_alloc_folio(struct resync_folio *rf,
>>                                       gfp_t gfp_flags)
>>   {
>> -       int i;
>> -
>> -       for (i = 0; i < RESYNC_PAGES; i++) {
>> -               rp->pages[i] = alloc_page(gfp_flags);
>> -               if (!rp->pages[i])
>> -                       goto out_free;
>> -       }
>> +       rf->folio = folio_alloc(gfp_flags, get_order(RESYNC_BLOCK_SIZE));
>> +       if (!rf->folio)
>> +               return -ENOMEM;
> 
> Is it ok to add an error log here? Compare with the multipage
> situation, the possibility of failure will be somewhat higher because
> it needs to alloc a contiguous block of physical memory.
> 

Hi, Xiao

Thanks for your review.

In patch 15 we fall back to a smaller order if sync folio alloc fails.
After that the alloc usually succeeds, so an error log seems noisy. Should
I add a log before the fallback and keep it in patch 15?

>> -static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
>> +static void md_bio_reset_resync_folio(struct bio *bio, struct resync_folio *rf,
>>                                 int size)
>>   {
>> -       int idx = 0;
>> -
>>          /* initialize bvec table again */
>>          do {
>> -               struct page *page = resync_fetch_page(rp, idx);
>> -               int len = min_t(int, size, PAGE_SIZE);
>> +               struct folio *folio = resync_fetch_folio(rf);
>> +               int len = min_t(int, size, RESYNC_BLOCK_SIZE);
>>
>> -               if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
>> +               if (WARN_ON(!bio_add_folio(bio, folio, len, 0))) {
> 
> Is it ok to use bio_add_folio(bio, folio, RESYNC_BLOCK_SIZE, 0)
> directly here? It removes `size -= len` below, so it's not useless to
> compare size and RESYNC_BLOCK_SIZE above?
> 

Same as the previous one, the size is no longer a fixed value after
patch 15. I think keeping it here gives better compatibility.

>>          /*
>> -        * Allocate RESYNC_PAGES data pages and attach them to
>> -        * the first bio.
>> +        * Allocate data folio and attach them to the first bio.
> 
> typo error
> s/attach them/attach it/g
> 

I will fix it later. Thanks.

>> @@ -2284,44 +2278,30 @@ static void process_checks(struct r1bio *r1_bio)
>>                  }
>>          r1_bio->read_disk = primary;
>>          for (i = 0; i < conf->raid_disks * 2; i++) {
>> -               int j = 0;
>>                  struct bio *pbio = r1_bio->bios[primary];
>>                  struct bio *sbio = r1_bio->bios[i];
>>                  blk_status_t status = sbio->bi_status;
>> -               struct page **ppages = get_resync_pages(pbio)->pages;
>> -               struct page **spages = get_resync_pages(sbio)->pages;
>> -               struct bio_vec *bi;
>> -               int page_len[RESYNC_PAGES] = { 0 };
>> -               struct bvec_iter_all iter_all;
>> +               struct folio *pfolio = get_resync_folio(pbio)->folio;
>> +               struct folio *sfolio = get_resync_folio(sbio)->folio;
>>
>>                  if (sbio->bi_end_io != end_sync_read)
>>                          continue;
>>                  /* Now we can 'fixup' the error value */
>>                  sbio->bi_status = 0;
>>
>> -               bio_for_each_segment_all(bi, sbio, iter_all)
>> -                       page_len[j++] = bi->bv_len;
>> -
>> -               if (!status) {
>> -                       for (j = vcnt; j-- ; ) {
>> -                               if (memcmp(page_address(ppages[j]),
>> -                                          page_address(spages[j]),
>> -                                          page_len[j]))
>> -                                       break;
>> -                       }
>> -               } else
>> -                       j = 0;
>> -               if (j >= 0)
>> +               if (status || memcmp(folio_address(pfolio),
>> +                                    folio_address(sfolio),
>> +                                    r1_bio->sectors << 9)) {
>>                          atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
>> -               if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
>> -                             && !status)) {
>> -                       /* No need to write to this device. */
>> -                       sbio->bi_end_io = NULL;
>> -                       rdev_dec_pending(conf->mirrors[i].rdev, mddev);
>> -                       continue;
>> +                       if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
>> +                               bio_copy_data(sbio, pbio);
>> +                               continue;
>> +                       }
> 
> The logic is changed here. The original logic:
> 1. read ok, no mismatch: no bio_copy_data
> 2. read ok, mismatch, check: no bio_copy_data
> 3. read ok, mismatch, no check: need bio_copy_data
> 4. read fail: need bio_copy_data
> 
> The 4 is broken.
> 
> How about adding a temporary need_write to make logic more clear?
> 
> something like:
>          if (!status) {
>              int ret = 0;
>              ret = memcpy(folio_address(pfolio),
>                               folio_address(sfolio),
>                               r1_bio->sectors << 9);
>              if (ret) {
>                  atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
>                  if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
>                      need_write = true;
>              }
>          } else
>              need_write = true;
> 
>          if (need_write)
>              bio_copy_data(sbio, pbio);
>          else {
>              /* No need to write to this device. */
>              sbio->bi_end_io = NULL;
>              rdev_dec_pending(conf->mirrors[i].rdev, mddev);
>          }
> 

Nice catch, read failis is indeed broken. I’ll fix it in v2.

>>                  }
>>
>> -               bio_copy_data(sbio, pbio);
>> +               /* No need to write to this device. */
>> +               sbio->bi_end_io = NULL;
>> +               rdev_dec_pending(conf->mirrors[i].rdev, mddev);
>>          }
>>   }
>> >> @@ -3020,24 +2997,24 @@ static sector_t 	(struct mddev *mddev, sector_t 
sector_nr,
>>                  }
>>
>>                  for (i = 0 ; i < conf->raid_disks * 2; i++) {
>> -                       struct resync_pages *rp;
>> +                       struct resync_folio *rf;
>>
>>                          bio = r1_bio->bios[i];
>> -                       rp = get_resync_pages(bio);
>> +                       rf = get_resync_folio(bio);
>>                          if (bio->bi_end_io) {
>> -                               page = resync_fetch_page(rp, page_idx);
>> +                               folio = resync_fetch_folio(rf);
>>
>>                                  /*
>>                                   * won't fail because the vec table is big
>>                                   * enough to hold all these pages
>>                                   */
> 
> The comments above may not be needed anymore. Because there is only
> one vec in the bio.
> 

I will clean it up.

>> -                               __bio_add_page(bio, page, len, 0);
>> +                               bio_add_folio_nofail(bio, folio, len, 0);
>>                          }
>>                  }
>>                  nr_sectors += len>>9;
>>                  sector_nr += len>>9;
>>                  sync_blocks -= (len>>9);
> 
> These three lines are not needed anymore.
> 

It is cleaned up in later patches. In this patch I only want minimal
changes, just folio API and naming replacements. Do you think I should move
those cleanups into this patch?


>>          /*
>> -        * Allocate RESYNC_PAGES data pages and attach them
>> -        * where needed.
>> +        * Allocate data folio and attach them where needed.
> 
> typo error
> s/attach them/attach it/g
> 

I will fix it in v2.

>>           */
>>          for (j = 0; j < nalloc; j++) {
>>                  struct bio *rbio = r10_bio->devs[j].repl_bio;
>> -               struct resync_pages *rp, *rp_repl;
>> +               struct resync_folio *rf, *rf_repl;
>>
>> -               rp = &rps[j];
>> +               rf = &rfs[j];
>>                  if (rbio)
>> -                       rp_repl = &rps[nalloc + j];
>> +                       rf_repl = &rfs[nalloc + j];
>>
>>                  bio = r10_bio->devs[j].bio;
>>
>>                  if (!j || test_bit(MD_RECOVERY_SYNC,
>>                                     &conf->mddev->recovery)) {
>> -                       if (resync_alloc_pages(rp, gfp_flags))
>> +                       if (resync_alloc_folio(rf, gfp_flags))
>>                                  goto out_free_pages;
> 
> s/out_free_pages/out_free_folio/g
> 

I will fix it in v2.

>>                  } else {
>> -                       memcpy(rp, &rps[0], sizeof(*rp));
>> -                       resync_get_all_pages(rp);
>> +                       memcpy(rf, &rfs[0], sizeof(*rf));
>> +                       resync_get_all_folio(rf);
> 
> Maybe the name resync_get_folio is better?

Agree, I will rename it in v2.

>> @@ -2492,19 +2480,21 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
>>
>>                  rdev = conf->mirrors[dr].rdev;
>>                  addr = r10_bio->devs[0].addr + sect;
>> -               ok = sync_page_io(rdev,
>> -                                 addr,
>> -                                 s << 9,
>> -                                 pages[idx],
>> -                                 REQ_OP_READ, false);
>> +               ok = sync_folio_io(rdev,
>> +                                  addr,
>> +                                  s << 9,
>> +                                  sect << 9,
>> +                                  folio,
>> +                                  REQ_OP_READ, false);
> 
> By the comments at the beginning of fix_recovery_read_error, it needs
> to submit io with a page size unit, right? If so, it still needs to
> use sync_page_io here.
> 

Here 's' is PAGE_SIZE. We just use a 'page' from folio. In patch 10,
I will change it to use logical block size instead, which should be more
reasonable.

>>          /*
>>           * Allow skipping a full rebuild for incremental assembly
>> @@ -3277,7 +3265,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
>>           * with 2 bios in each, that correspond to the bios in the main one.
>>           * In this case, the subordinate r10bios link back through a
>>           * borrowed master_bio pointer, and the counter in the master
>> -        * includes a ref from each subordinate.
>> +        * bio_add_folio includes a ref from each subordinate.
> 
> What's the reason change this? And I don't understand the new version.
> 

It looks like a typo. I will remove it.

> Best Regards
> Xiao

Thanks again for your careful review.

-- 
Thanks,
Nan

Re: [PATCH 06/15] md/raid1,raid10: use folio for sync path IO

Posted by Xiao Ni 2 weeks, 4 days ago

On Thu, Jan 22, 2026 at 10:12 AM Li Nan <linan666@huaweicloud.com> wrote:
>
>
>
> 在 2026/1/20 23:53, Xiao Ni 写道:
> > On Wed, Dec 17, 2025 at 8:11 PM <linan666@huaweicloud.com> wrote:
> >>
> >> From: Li Nan <linan122@huawei.com>
> >>
> >> Convert all IO on the sync path to use folios. Rename page-related
> >> identifiers to match folio.
> >>
> >> Retain some now-unnecessary while and for loops to minimize code
> >> changes, clean them up in a subsequent patch.
> >>
> >> Signed-off-by: Li Nan <linan122@huawei.com >> -static inline int resync_alloc_pages(struct resync_pages *rp,
> >> +static inline int resync_alloc_folio(struct resync_folio *rf,
> >>                                       gfp_t gfp_flags)
> >>   {
> >> -       int i;
> >> -
> >> -       for (i = 0; i < RESYNC_PAGES; i++) {
> >> -               rp->pages[i] = alloc_page(gfp_flags);
> >> -               if (!rp->pages[i])
> >> -                       goto out_free;
> >> -       }
> >> +       rf->folio = folio_alloc(gfp_flags, get_order(RESYNC_BLOCK_SIZE));
> >> +       if (!rf->folio)
> >> +               return -ENOMEM;
> >
> > Is it ok to add an error log here? Compare with the multipage
> > situation, the possibility of failure will be somewhat higher because
> > it needs to alloc a contiguous block of physical memory.
> >
>
> Hi, Xiao
>
> Thanks for your review.
>
> In patch 15 we fall back to a smaller order if sync folio alloc fails.
> After that the alloc usually succeeds, so an error log seems noisy. Should
> I add a log before the fallback and keep it in patch 15?

Thanks for the explanation. No change is needed here :)

>
> >> -static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
> >> +static void md_bio_reset_resync_folio(struct bio *bio, struct resync_folio *rf,
> >>                                 int size)
> >>   {
> >> -       int idx = 0;
> >> -
> >>          /* initialize bvec table again */
> >>          do {
> >> -               struct page *page = resync_fetch_page(rp, idx);
> >> -               int len = min_t(int, size, PAGE_SIZE);
> >> +               struct folio *folio = resync_fetch_folio(rf);
> >> +               int len = min_t(int, size, RESYNC_BLOCK_SIZE);
> >>
> >> -               if (WARN_ON(!bio_add_page(bio, page, len, 0))) {
> >> +               if (WARN_ON(!bio_add_folio(bio, folio, len, 0))) {
> >
> > Is it ok to use bio_add_folio(bio, folio, RESYNC_BLOCK_SIZE, 0)
> > directly here? It removes `size -= len` below, so it's not useless to
> > compare size and RESYNC_BLOCK_SIZE above?
> >
>
> Same as the previous one, the size is no longer a fixed value after
> patch 15. I think keeping it here gives better compatibility.

Thanks for the explanation.

>
> >>          /*
> >> -        * Allocate RESYNC_PAGES data pages and attach them to
> >> -        * the first bio.
> >> +        * Allocate data folio and attach them to the first bio.
> >
> > typo error
> > s/attach them/attach it/g
> >
>
> I will fix it later. Thanks.
>
> >> @@ -2284,44 +2278,30 @@ static void process_checks(struct r1bio *r1_bio)
> >>                  }
> >>          r1_bio->read_disk = primary;
> >>          for (i = 0; i < conf->raid_disks * 2; i++) {
> >> -               int j = 0;
> >>                  struct bio *pbio = r1_bio->bios[primary];
> >>                  struct bio *sbio = r1_bio->bios[i];
> >>                  blk_status_t status = sbio->bi_status;
> >> -               struct page **ppages = get_resync_pages(pbio)->pages;
> >> -               struct page **spages = get_resync_pages(sbio)->pages;
> >> -               struct bio_vec *bi;
> >> -               int page_len[RESYNC_PAGES] = { 0 };
> >> -               struct bvec_iter_all iter_all;
> >> +               struct folio *pfolio = get_resync_folio(pbio)->folio;
> >> +               struct folio *sfolio = get_resync_folio(sbio)->folio;
> >>
> >>                  if (sbio->bi_end_io != end_sync_read)
> >>                          continue;
> >>                  /* Now we can 'fixup' the error value */
> >>                  sbio->bi_status = 0;
> >>
> >> -               bio_for_each_segment_all(bi, sbio, iter_all)
> >> -                       page_len[j++] = bi->bv_len;
> >> -
> >> -               if (!status) {
> >> -                       for (j = vcnt; j-- ; ) {
> >> -                               if (memcmp(page_address(ppages[j]),
> >> -                                          page_address(spages[j]),
> >> -                                          page_len[j]))
> >> -                                       break;
> >> -                       }
> >> -               } else
> >> -                       j = 0;
> >> -               if (j >= 0)
> >> +               if (status || memcmp(folio_address(pfolio),
> >> +                                    folio_address(sfolio),
> >> +                                    r1_bio->sectors << 9)) {
> >>                          atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
> >> -               if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
> >> -                             && !status)) {
> >> -                       /* No need to write to this device. */
> >> -                       sbio->bi_end_io = NULL;
> >> -                       rdev_dec_pending(conf->mirrors[i].rdev, mddev);
> >> -                       continue;
> >> +                       if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
> >> +                               bio_copy_data(sbio, pbio);
> >> +                               continue;
> >> +                       }
> >
> > The logic is changed here. The original logic:
> > 1. read ok, no mismatch: no bio_copy_data
> > 2. read ok, mismatch, check: no bio_copy_data
> > 3. read ok, mismatch, no check: need bio_copy_data
> > 4. read fail: need bio_copy_data
> >
> > The 4 is broken.
> >
> > How about adding a temporary need_write to make logic more clear?
> >
> > something like:
> >          if (!status) {
> >              int ret = 0;
> >              ret = memcpy(folio_address(pfolio),
> >                               folio_address(sfolio),
> >                               r1_bio->sectors << 9);
> >              if (ret) {
> >                  atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
> >                  if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
> >                      need_write = true;
> >              }
> >          } else
> >              need_write = true;
> >
> >          if (need_write)
> >              bio_copy_data(sbio, pbio);
> >          else {
> >              /* No need to write to this device. */
> >              sbio->bi_end_io = NULL;
> >              rdev_dec_pending(conf->mirrors[i].rdev, mddev);
> >          }
> >
>
> Nice catch, read failis is indeed broken. I’ll fix it in v2.
>
> >>                  }
> >>
> >> -               bio_copy_data(sbio, pbio);
> >> +               /* No need to write to this device. */
> >> +               sbio->bi_end_io = NULL;
> >> +               rdev_dec_pending(conf->mirrors[i].rdev, mddev);
> >>          }
> >>   }
> >> >> @@ -3020,24 +2997,24 @@ static sector_t   (struct mddev *mddev, sector_t
> sector_nr,
> >>                  }
> >>
> >>                  for (i = 0 ; i < conf->raid_disks * 2; i++) {
> >> -                       struct resync_pages *rp;
> >> +                       struct resync_folio *rf;
> >>
> >>                          bio = r1_bio->bios[i];
> >> -                       rp = get_resync_pages(bio);
> >> +                       rf = get_resync_folio(bio);
> >>                          if (bio->bi_end_io) {
> >> -                               page = resync_fetch_page(rp, page_idx);
> >> +                               folio = resync_fetch_folio(rf);
> >>
> >>                                  /*
> >>                                   * won't fail because the vec table is big
> >>                                   * enough to hold all these pages
> >>                                   */
> >
> > The comments above may not be needed anymore. Because there is only
> > one vec in the bio.
> >
>
> I will clean it up.
>
> >> -                               __bio_add_page(bio, page, len, 0);
> >> +                               bio_add_folio_nofail(bio, folio, len, 0);
> >>                          }
> >>                  }
> >>                  nr_sectors += len>>9;
> >>                  sector_nr += len>>9;
> >>                  sync_blocks -= (len>>9);
> >
> > These three lines are not needed anymore.
> >
>
> It is cleaned up in later patches. In this patch I only want minimal
> changes, just folio API and naming replacements. Do you think I should move
> those cleanups into this patch?

I think it's a better choice. But it really depends on you. I'm ok if
you prefer your patch sequence.

>
>
> >>          /*
> >> -        * Allocate RESYNC_PAGES data pages and attach them
> >> -        * where needed.
> >> +        * Allocate data folio and attach them where needed.
> >
> > typo error
> > s/attach them/attach it/g
> >
>
> I will fix it in v2.
>
> >>           */
> >>          for (j = 0; j < nalloc; j++) {
> >>                  struct bio *rbio = r10_bio->devs[j].repl_bio;
> >> -               struct resync_pages *rp, *rp_repl;
> >> +               struct resync_folio *rf, *rf_repl;
> >>
> >> -               rp = &rps[j];
> >> +               rf = &rfs[j];
> >>                  if (rbio)
> >> -                       rp_repl = &rps[nalloc + j];
> >> +                       rf_repl = &rfs[nalloc + j];
> >>
> >>                  bio = r10_bio->devs[j].bio;
> >>
> >>                  if (!j || test_bit(MD_RECOVERY_SYNC,
> >>                                     &conf->mddev->recovery)) {
> >> -                       if (resync_alloc_pages(rp, gfp_flags))
> >> +                       if (resync_alloc_folio(rf, gfp_flags))
> >>                                  goto out_free_pages;
> >
> > s/out_free_pages/out_free_folio/g
> >
>
> I will fix it in v2.
>
> >>                  } else {
> >> -                       memcpy(rp, &rps[0], sizeof(*rp));
> >> -                       resync_get_all_pages(rp);
> >> +                       memcpy(rf, &rfs[0], sizeof(*rf));
> >> +                       resync_get_all_folio(rf);
> >
> > Maybe the name resync_get_folio is better?
>
> Agree, I will rename it in v2.
>
> >> @@ -2492,19 +2480,21 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
> >>
> >>                  rdev = conf->mirrors[dr].rdev;
> >>                  addr = r10_bio->devs[0].addr + sect;
> >> -               ok = sync_page_io(rdev,
> >> -                                 addr,
> >> -                                 s << 9,
> >> -                                 pages[idx],
> >> -                                 REQ_OP_READ, false);
> >> +               ok = sync_folio_io(rdev,
> >> +                                  addr,
> >> +                                  s << 9,
> >> +                                  sect << 9,
> >> +                                  folio,
> >> +                                  REQ_OP_READ, false);
> >
> > By the comments at the beginning of fix_recovery_read_error, it needs
> > to submit io with a page size unit, right? If so, it still needs to
> > use sync_page_io here.
> >
>
> Here 's' is PAGE_SIZE. We just use a 'page' from folio. In patch 10,
> I will change it to use logical block size instead, which should be more
> reasonable.

Ok.

>
> >>          /*
> >>           * Allow skipping a full rebuild for incremental assembly
> >> @@ -3277,7 +3265,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
> >>           * with 2 bios in each, that correspond to the bios in the main one.
> >>           * In this case, the subordinate r10bios link back through a
> >>           * borrowed master_bio pointer, and the counter in the master
> >> -        * includes a ref from each subordinate.
> >> +        * bio_add_folio includes a ref from each subordinate.
> >
> > What's the reason change this? And I don't understand the new version.
> >
>
> It looks like a typo. I will remove it.
>
> > Best Regards
> > Xiao
>
> Thanks again for your careful review.

You're welcome.

Best Regards
Xiao
>
> --
> Thanks,
> Nan
>

Re: [PATCH 06/15] md/raid1,raid10: use folio for sync path IO

Posted by Li Nan 1 month, 2 weeks ago


在 2025/12/17 20:00, linan666@huaweicloud.com 写道:
> From: Li Nan <linan122@huawei.com>
> 
> Convert all IO on the sync path to use folios. Rename page-related
> identifiers to match folio.
> 
> Retain some now-unnecessary while and for loops to minimize code
> changes, clean them up in a subsequent patch.
> 
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>   drivers/md/md.c       |   2 +-
>   drivers/md/raid1-10.c |  60 ++++--------
>   drivers/md/raid1.c    | 155 ++++++++++++++-----------------
>   drivers/md/raid10.c   | 207 +++++++++++++++++++-----------------------
>   4 files changed, 179 insertions(+), 245 deletions(-)
> 

This patch misses modifications to functions raid1_alloc_init_r1buf() and
raid10_alloc_init_r10buf(). They will be included with other suggestions in v2.


  static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
  {
         struct r1bio *r1bio = mempool_alloc(&conf->r1buf_pool, GFP_NOIO);
-       struct resync_pages *rps;
+       struct resync_folio *rfs;
         struct bio *bio;
         int i;


  static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
  {
         struct r10bio *r10bio = mempool_alloc(&conf->r10buf_pool, GFP_NOIO);
-       struct rsync_pages *rp;
+       struct resync_folio *rf;
         struct bio *bio;
         int nalloc;
         int i;

-- 
Thanks,
Nan