Set BLK_FEAT_ATOMIC_WRITES_STACKED to enable atomic writes.
For an attempt to atomic write to a region which has bad blocks, error
the write as we just cannot do this. It is unlikely to find devices which
support atomic writes and bad blocks.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/md/raid10.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9c56b27b754a..aacd8c3381f5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1454,6 +1454,13 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
is_bad = is_badblock(rdev, dev_sector, max_sectors,
&first_bad, &bad_sectors);
+
+ if (is_bad && bio->bi_opf & REQ_ATOMIC) {
+ /* We just cannot atomically write this ... */
+ error = -EFAULT;
+ goto err_handle;
+ }
+
if (is_bad && first_bad <= dev_sector) {
/* Cannot write here at all */
bad_sectors -= (dev_sector - first_bad);
@@ -4029,6 +4036,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
+ lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
queue_limits_cancel_update(mddev->gendisk->queue);
--
2.31.1
Hi John, kernel test robot noticed the following build errors: [auto build test ERROR on axboe-block/for-next] [also build test ERROR on linus/master v6.12-rc5 next-20241030] [cannot apply to song-md/md-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/John-Garry/block-Add-extra-checks-in-blk_validate_atomic_write_limits/20241030-175428 base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next patch link: https://lore.kernel.org/r/20241030094912.3960234-6-john.g.garry%40oracle.com patch subject: [PATCH v2 5/5] md/raid10: Atomic write support config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20241031/202410311223.WHxXOaS2-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241031/202410311223.WHxXOaS2-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410311223.WHxXOaS2-lkp@intel.com/ All errors (new ones prefixed by >>): drivers/md/raid10.c: In function 'raid10_write_request': >> drivers/md/raid10.c:1448:33: error: 'error' undeclared (first use in this function); did you mean 'md_error'? 1448 | error = -EFAULT; | ^~~~~ | md_error drivers/md/raid10.c:1448:33: note: each undeclared identifier is reported only once for each function it appears in >> drivers/md/raid10.c:1449:33: error: label 'err_handle' used but not defined 1449 | goto err_handle; | ^~~~ vim +1448 drivers/md/raid10.c 1345 1346 static void raid10_write_request(struct mddev *mddev, struct bio *bio, 1347 struct r10bio *r10_bio) 1348 { 1349 struct r10conf *conf = mddev->private; 1350 int i; 1351 sector_t sectors; 1352 int max_sectors; 1353 1354 if ((mddev_is_clustered(mddev) && 1355 md_cluster_ops->area_resyncing(mddev, WRITE, 1356 bio->bi_iter.bi_sector, 1357 bio_end_sector(bio)))) { 1358 DEFINE_WAIT(w); 1359 /* Bail out if REQ_NOWAIT is set for the bio */ 1360 if (bio->bi_opf & REQ_NOWAIT) { 1361 bio_wouldblock_error(bio); 1362 return; 1363 } 1364 for (;;) { 1365 prepare_to_wait(&conf->wait_barrier, 1366 &w, TASK_IDLE); 1367 if (!md_cluster_ops->area_resyncing(mddev, WRITE, 1368 bio->bi_iter.bi_sector, bio_end_sector(bio))) 1369 break; 1370 schedule(); 1371 } 1372 finish_wait(&conf->wait_barrier, &w); 1373 } 1374 1375 sectors = r10_bio->sectors; 1376 if (!regular_request_wait(mddev, conf, bio, sectors)) 1377 return; 1378 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && 1379 (mddev->reshape_backwards 1380 ? (bio->bi_iter.bi_sector < conf->reshape_safe && 1381 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) 1382 : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe && 1383 bio->bi_iter.bi_sector < conf->reshape_progress))) { 1384 /* Need to update reshape_position in metadata */ 1385 mddev->reshape_position = conf->reshape_progress; 1386 set_mask_bits(&mddev->sb_flags, 0, 1387 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING)); 1388 md_wakeup_thread(mddev->thread); 1389 if (bio->bi_opf & REQ_NOWAIT) { 1390 allow_barrier(conf); 1391 bio_wouldblock_error(bio); 1392 return; 1393 } 1394 mddev_add_trace_msg(conf->mddev, 1395 "raid10 wait reshape metadata"); 1396 wait_event(mddev->sb_wait, 1397 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); 1398 1399 conf->reshape_safe = mddev->reshape_position; 1400 } 1401 1402 /* first select target devices under rcu_lock and 1403 * inc refcount on their rdev. Record them by setting 1404 * bios[x] to bio 1405 * If there are known/acknowledged bad blocks on any device 1406 * on which we have seen a write error, we want to avoid 1407 * writing to those blocks. This potentially requires several 1408 * writes to write around the bad blocks. Each set of writes 1409 * gets its own r10_bio with a set of bios attached. 1410 */ 1411 1412 r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ 1413 raid10_find_phys(conf, r10_bio); 1414 1415 wait_blocked_dev(mddev, r10_bio); 1416 1417 max_sectors = r10_bio->sectors; 1418 1419 for (i = 0; i < conf->copies; i++) { 1420 int d = r10_bio->devs[i].devnum; 1421 struct md_rdev *rdev, *rrdev; 1422 1423 rdev = conf->mirrors[d].rdev; 1424 rrdev = conf->mirrors[d].replacement; 1425 if (rdev && (test_bit(Faulty, &rdev->flags))) 1426 rdev = NULL; 1427 if (rrdev && (test_bit(Faulty, &rrdev->flags))) 1428 rrdev = NULL; 1429 1430 r10_bio->devs[i].bio = NULL; 1431 r10_bio->devs[i].repl_bio = NULL; 1432 1433 if (!rdev && !rrdev) { 1434 set_bit(R10BIO_Degraded, &r10_bio->state); 1435 continue; 1436 } 1437 if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { 1438 sector_t first_bad; 1439 sector_t dev_sector = r10_bio->devs[i].addr; 1440 int bad_sectors; 1441 int is_bad; 1442 1443 is_bad = is_badblock(rdev, dev_sector, max_sectors, 1444 &first_bad, &bad_sectors); 1445 1446 if (is_bad && bio->bi_opf & REQ_ATOMIC) { 1447 /* We just cannot atomically write this ... */ > 1448 error = -EFAULT; > 1449 goto err_handle; 1450 } 1451 1452 if (is_bad && first_bad <= dev_sector) { 1453 /* Cannot write here at all */ 1454 bad_sectors -= (dev_sector - first_bad); 1455 if (bad_sectors < max_sectors) 1456 /* Mustn't write more than bad_sectors 1457 * to other devices yet 1458 */ 1459 max_sectors = bad_sectors; 1460 /* We don't set R10BIO_Degraded as that 1461 * only applies if the disk is missing, 1462 * so it might be re-added, and we want to 1463 * know to recover this chunk. 1464 * In this case the device is here, and the 1465 * fact that this chunk is not in-sync is 1466 * recorded in the bad block log. 1467 */ 1468 continue; 1469 } 1470 if (is_bad) { 1471 int good_sectors = first_bad - dev_sector; 1472 if (good_sectors < max_sectors) 1473 max_sectors = good_sectors; 1474 } 1475 } 1476 if (rdev) { 1477 r10_bio->devs[i].bio = bio; 1478 atomic_inc(&rdev->nr_pending); 1479 } 1480 if (rrdev) { 1481 r10_bio->devs[i].repl_bio = bio; 1482 atomic_inc(&rrdev->nr_pending); 1483 } 1484 } 1485 1486 if (max_sectors < r10_bio->sectors) 1487 r10_bio->sectors = max_sectors; 1488 1489 if (r10_bio->sectors < bio_sectors(bio)) { 1490 struct bio *split = bio_split(bio, r10_bio->sectors, 1491 GFP_NOIO, &conf->bio_split); 1492 bio_chain(split, bio); 1493 allow_barrier(conf); 1494 submit_bio_noacct(bio); 1495 wait_barrier(conf, false); 1496 bio = split; 1497 r10_bio->master_bio = bio; 1498 } 1499 1500 md_account_bio(mddev, &bio); 1501 r10_bio->master_bio = bio; 1502 atomic_set(&r10_bio->remaining, 1); 1503 mddev->bitmap_ops->startwrite(mddev, r10_bio->sector, r10_bio->sectors, 1504 false); 1505 1506 for (i = 0; i < conf->copies; i++) { 1507 if (r10_bio->devs[i].bio) 1508 raid10_write_one_disk(mddev, r10_bio, bio, false, i); 1509 if (r10_bio->devs[i].repl_bio) 1510 raid10_write_one_disk(mddev, r10_bio, bio, true, i); 1511 } 1512 one_write_done(r10_bio); 1513 } 1514 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2024 Red Hat, Inc.