[PATCH v3 03/13] md/raid1,raid10: return actual write status in narrow_write_error

linan666@huaweicloud.com posted 13 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH v3 03/13] md/raid1,raid10: return actual write status in narrow_write_error
Posted by linan666@huaweicloud.com 1 month, 3 weeks ago
From: Li Nan <linan122@huawei.com>

narrow_write_error() currently returns true when setting badblocks fails.
Instead, return actual status of all retried writes, succeeding only when
all retried writes complete successfully. This gives upper layers accurate
information about write outcomes.

When setting badblocks fails, mark the device as faulty and return at once.
No need to continue processing remaining sections in such cases.

Signed-off-by: Li Nan <linan122@huawei.com>
---
 drivers/md/raid1.c  | 17 +++++++++--------
 drivers/md/raid10.c | 15 +++++++++------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 90ad9455f74a..9ffa3ab0fdcc 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2541,11 +2541,15 @@ static bool narrow_write_error(struct r1bio *r1_bio, int i)
 		bio_trim(wbio, sector - r1_bio->sector, sectors);
 		wbio->bi_iter.bi_sector += rdev->data_offset;
 
-		if (submit_bio_wait(wbio) < 0)
+		if (submit_bio_wait(wbio)) {
 			/* failure! */
-			ok = rdev_set_badblocks(rdev, sector,
-						sectors, 0)
-				&& ok;
+			ok = false;
+			if (!rdev_set_badblocks(rdev, sector, sectors, 0)) {
+				md_error(mddev, rdev);
+				bio_put(wbio);
+				break;
+			}
+		}
 
 		bio_put(wbio);
 		sect_to_write -= sectors;
@@ -2596,10 +2600,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
 			 * errors.
 			 */
 			fail = true;
-			if (!narrow_write_error(r1_bio, m))
-				md_error(conf->mddev,
-					 conf->mirrors[m].rdev);
-				/* an I/O failed, we can't clear the bitmap */
+			narrow_write_error(r1_bio, m);
 			rdev_dec_pending(conf->mirrors[m].rdev,
 					 conf->mddev);
 		}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 40c31c00dc60..21a347c4829b 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2820,11 +2820,15 @@ static bool narrow_write_error(struct r10bio *r10_bio, int i)
 				   choose_data_offset(r10_bio, rdev);
 		wbio->bi_opf = REQ_OP_WRITE;
 
-		if (submit_bio_wait(wbio) < 0)
+		if (submit_bio_wait(wbio)) {
 			/* Failure! */
-			ok = rdev_set_badblocks(rdev, wsector,
-						sectors, 0)
-				&& ok;
+			ok = false;
+			if (!rdev_set_badblocks(rdev, wsector, sectors, 0)) {
+				md_error(mddev, rdev);
+				bio_put(wbio);
+				break;
+			}
+		}
 
 		bio_put(wbio);
 		sect_to_write -= sectors;
@@ -2936,8 +2940,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 				rdev_dec_pending(rdev, conf->mddev);
 			} else if (bio != NULL && bio->bi_status) {
 				fail = true;
-				if (!narrow_write_error(r10_bio, m))
-					md_error(conf->mddev, rdev);
+				narrow_write_error(r10_bio, m);
 				rdev_dec_pending(rdev, conf->mddev);
 			}
 			bio = r10_bio->devs[m].repl_bio;
-- 
2.39.2
Re: [PATCH v3 03/13] md/raid1,raid10: return actual write status in narrow_write_error
Posted by Yu Kuai 1 month, 1 week ago
Hi,

在 2025/12/15 11:04, linan666@huaweicloud.com 写道:
> From: Li Nan <linan122@huawei.com>
>
> narrow_write_error() currently returns true when setting badblocks fails.
> Instead, return actual status of all retried writes, succeeding only when
> all retried writes complete successfully. This gives upper layers accurate
> information about write outcomes.
>
> When setting badblocks fails, mark the device as faulty and return at once.
> No need to continue processing remaining sections in such cases.
>
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>   drivers/md/raid1.c  | 17 +++++++++--------
>   drivers/md/raid10.c | 15 +++++++++------
>   2 files changed, 18 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 90ad9455f74a..9ffa3ab0fdcc 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -2541,11 +2541,15 @@ static bool narrow_write_error(struct r1bio *r1_bio, int i)
>   		bio_trim(wbio, sector - r1_bio->sector, sectors);
>   		wbio->bi_iter.bi_sector += rdev->data_offset;
>   
> -		if (submit_bio_wait(wbio) < 0)
> +		if (submit_bio_wait(wbio)) {
>   			/* failure! */
> -			ok = rdev_set_badblocks(rdev, sector,
> -						sectors, 0)
> -				&& ok;
> +			ok = false;
> +			if (!rdev_set_badblocks(rdev, sector, sectors, 0)) {
> +				md_error(mddev, rdev);
> +				bio_put(wbio);
> +				break;
> +			}
> +		}
>   
>   		bio_put(wbio);
>   		sect_to_write -= sectors;
> @@ -2596,10 +2600,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
>   			 * errors.
>   			 */
>   			fail = true;
> -			if (!narrow_write_error(r1_bio, m))
> -				md_error(conf->mddev,
> -					 conf->mirrors[m].rdev);
> -				/* an I/O failed, we can't clear the bitmap */
> +			narrow_write_error(r1_bio, m);

I remembered that I said please change this helper to void.

>   			rdev_dec_pending(conf->mirrors[m].rdev,
>   					 conf->mddev);
>   		}
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 40c31c00dc60..21a347c4829b 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2820,11 +2820,15 @@ static bool narrow_write_error(struct r10bio *r10_bio, int i)
>   				   choose_data_offset(r10_bio, rdev);
>   		wbio->bi_opf = REQ_OP_WRITE;
>   
> -		if (submit_bio_wait(wbio) < 0)
> +		if (submit_bio_wait(wbio)) {
>   			/* Failure! */
> -			ok = rdev_set_badblocks(rdev, wsector,
> -						sectors, 0)
> -				&& ok;
> +			ok = false;
> +			if (!rdev_set_badblocks(rdev, wsector, sectors, 0)) {
> +				md_error(mddev, rdev);
> +				bio_put(wbio);
> +				break;
> +			}
> +		}
>   
>   		bio_put(wbio);
>   		sect_to_write -= sectors;
> @@ -2936,8 +2940,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
>   				rdev_dec_pending(rdev, conf->mddev);
>   			} else if (bio != NULL && bio->bi_status) {
>   				fail = true;
> -				if (!narrow_write_error(r10_bio, m))
> -					md_error(conf->mddev, rdev);
> +				narrow_write_error(r10_bio, m);
>   				rdev_dec_pending(rdev, conf->mddev);
>   			}
>   			bio = r10_bio->devs[m].repl_bio;

-- 
Thansk,
Kuai
Re: [PATCH v3 03/13] md/raid1,raid10: return actual write status in narrow_write_error
Posted by Yu Kuai 1 month, 1 week ago
Hi,

在 2026/1/3 17:37, Yu Kuai 写道:
> Hi,
>
> 在 2025/12/15 11:04, linan666@huaweicloud.com 写道:
>> From: Li Nan <linan122@huawei.com>
>>
>> narrow_write_error() currently returns true when setting badblocks fails.

I think you mean rewrite failed and set badblocks succeed.

>> Instead, return actual status of all retried writes, succeeding only when
>> all retried writes complete successfully. This gives upper layers accurate
>> information about write outcomes.
>>
>> When setting badblocks fails, mark the device as faulty and return at once.
>> No need to continue processing remaining sections in such cases.
>>
>> Signed-off-by: Li Nan <linan122@huawei.com>
>> ---
>>    drivers/md/raid1.c  | 17 +++++++++--------
>>    drivers/md/raid10.c | 15 +++++++++------
>>    2 files changed, 18 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index 90ad9455f74a..9ffa3ab0fdcc 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
>> @@ -2541,11 +2541,15 @@ static bool narrow_write_error(struct r1bio *r1_bio, int i)
>>    		bio_trim(wbio, sector - r1_bio->sector, sectors);
>>    		wbio->bi_iter.bi_sector += rdev->data_offset;
>>    
>> -		if (submit_bio_wait(wbio) < 0)
>> +		if (submit_bio_wait(wbio)) {
>>    			/* failure! */
>> -			ok = rdev_set_badblocks(rdev, sector,
>> -						sectors, 0)
>> -				&& ok;
>> +			ok = false;
>> +			if (!rdev_set_badblocks(rdev, sector, sectors, 0)) {
>> +				md_error(mddev, rdev);
>> +				bio_put(wbio);
>> +				break;
>> +			}
>> +		}
>>    
>>    		bio_put(wbio);
>>    		sect_to_write -= sectors;
>> @@ -2596,10 +2600,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
>>    			 * errors.
>>    			 */
>>    			fail = true;
>> -			if (!narrow_write_error(r1_bio, m))
>> -				md_error(conf->mddev,
>> -					 conf->mirrors[m].rdev);
>> -				/* an I/O failed, we can't clear the bitmap */
>> +			narrow_write_error(r1_bio, m);
> I remembered that I said please change this helper to void.

take a look back after patch 4, I still think this should be void.

>
>>    			rdev_dec_pending(conf->mirrors[m].rdev,
>>    					 conf->mddev);
>>    		}
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index 40c31c00dc60..21a347c4829b 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -2820,11 +2820,15 @@ static bool narrow_write_error(struct r10bio *r10_bio, int i)
>>    				   choose_data_offset(r10_bio, rdev);
>>    		wbio->bi_opf = REQ_OP_WRITE;
>>    
>> -		if (submit_bio_wait(wbio) < 0)
>> +		if (submit_bio_wait(wbio)) {
>>    			/* Failure! */
>> -			ok = rdev_set_badblocks(rdev, wsector,
>> -						sectors, 0)
>> -				&& ok;
>> +			ok = false;
>> +			if (!rdev_set_badblocks(rdev, wsector, sectors, 0)) {
>> +				md_error(mddev, rdev);
>> +				bio_put(wbio);
>> +				break;
>> +			}
>> +		}
>>    
>>    		bio_put(wbio);
>>    		sect_to_write -= sectors;
>> @@ -2936,8 +2940,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
>>    				rdev_dec_pending(rdev, conf->mddev);
>>    			} else if (bio != NULL && bio->bi_status) {
>>    				fail = true;
>> -				if (!narrow_write_error(r10_bio, m))
>> -					md_error(conf->mddev, rdev);
>> +				narrow_write_error(r10_bio, m);
>>    				rdev_dec_pending(rdev, conf->mddev);
>>    			}
>>    			bio = r10_bio->devs[m].repl_bio;

-- 
Thansk,
Kuai