[PATCH v2 7/7] md/raid10: Handle bio_split() errors

John Garry posted 7 patches 3 weeks, 6 days ago
There is a newer version of this series
[PATCH v2 7/7] md/raid10: Handle bio_split() errors
Posted by John Garry 3 weeks, 6 days ago
Add proper bio_split() error handling. For any error, call
raid_end_bio_io() and return. Except for discard, where we end the bio
directly.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/md/raid10.c | 47 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f3bf1116794a..9c56b27b754a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	int slot = r10_bio->read_slot;
 	struct md_rdev *err_rdev = NULL;
 	gfp_t gfp = GFP_NOIO;
+	int error;
 
 	if (slot >= 0 && r10_bio->devs[slot].rdev) {
 		/*
@@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	if (max_sectors < bio_sectors(bio)) {
 		struct bio *split = bio_split(bio, max_sectors,
 					      gfp, &conf->bio_split);
+		if (IS_ERR(split)) {
+			error = PTR_ERR(split);
+			goto err_handle;
+		}
 		bio_chain(split, bio);
 		allow_barrier(conf);
 		submit_bio_noacct(bio);
@@ -1236,6 +1241,12 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	mddev_trace_remap(mddev, read_bio, r10_bio->sector);
 	submit_bio_noacct(read_bio);
 	return;
+err_handle:
+	atomic_dec(&rdev->nr_pending);
+
+	bio->bi_status = errno_to_blk_status(error);
+	set_bit(R10BIO_Uptodate, &r10_bio->state);
+	raid_end_bio_io(r10_bio);
 }
 
 static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
@@ -1347,9 +1358,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 				 struct r10bio *r10_bio)
 {
 	struct r10conf *conf = mddev->private;
-	int i;
+	int i, k;
 	sector_t sectors;
 	int max_sectors;
+	int error;
 
 	if ((mddev_is_clustered(mddev) &&
 	     md_cluster_ops->area_resyncing(mddev, WRITE,
@@ -1482,6 +1494,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 	if (r10_bio->sectors < bio_sectors(bio)) {
 		struct bio *split = bio_split(bio, r10_bio->sectors,
 					      GFP_NOIO, &conf->bio_split);
+		if (IS_ERR(split)) {
+			error = PTR_ERR(split);
+			goto err_handle;
+		}
 		bio_chain(split, bio);
 		allow_barrier(conf);
 		submit_bio_noacct(bio);
@@ -1503,6 +1519,25 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 			raid10_write_one_disk(mddev, r10_bio, bio, true, i);
 	}
 	one_write_done(r10_bio);
+	return;
+err_handle:
+	for (k = 0;  k < i; k++) {
+		struct md_rdev *rdev, *rrdev;
+
+		rdev = conf->mirrors[k].rdev;
+		rrdev = conf->mirrors[k].replacement;
+
+		if (rdev)
+			rdev_dec_pending(conf->mirrors[k].rdev, mddev);
+		if (rrdev)
+			rdev_dec_pending(conf->mirrors[k].rdev, mddev);
+		r10_bio->devs[k].bio = NULL;
+		r10_bio->devs[k].repl_bio = NULL;
+	}
+
+	bio->bi_status = errno_to_blk_status(error);
+	set_bit(R10BIO_Uptodate, &r10_bio->state);
+	raid_end_bio_io(r10_bio);
 }
 
 static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
@@ -1644,6 +1679,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 	if (remainder) {
 		split_size = stripe_size - remainder;
 		split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
+		if (IS_ERR(split)) {
+			bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+			bio_endio(bio);
+			return 0;
+		}
 		bio_chain(split, bio);
 		allow_barrier(conf);
 		/* Resend the fist split part */
@@ -1654,6 +1694,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 	if (remainder) {
 		split_size = bio_sectors(bio) - remainder;
 		split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
+		if (IS_ERR(split)) {
+			bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+			bio_endio(bio);
+			return 0;
+		}
 		bio_chain(split, bio);
 		allow_barrier(conf);
 		/* Resend the second split part */
-- 
2.31.1
Re: [PATCH v2 7/7] md/raid10: Handle bio_split() errors
Posted by Yu Kuai 3 weeks, 5 days ago
Hi,

在 2024/10/28 23:27, John Garry 写道:
> Add proper bio_split() error handling. For any error, call
> raid_end_bio_io() and return. Except for discard, where we end the bio
> directly.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/md/raid10.c | 47 ++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index f3bf1116794a..9c56b27b754a 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>   	int slot = r10_bio->read_slot;
>   	struct md_rdev *err_rdev = NULL;
>   	gfp_t gfp = GFP_NOIO;
> +	int error;
>   
>   	if (slot >= 0 && r10_bio->devs[slot].rdev) {
>   		/*
> @@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>   	if (max_sectors < bio_sectors(bio)) {
>   		struct bio *split = bio_split(bio, max_sectors,
>   					      gfp, &conf->bio_split);
> +		if (IS_ERR(split)) {
> +			error = PTR_ERR(split);
> +			goto err_handle;
> +		}
>   		bio_chain(split, bio);
>   		allow_barrier(conf);
>   		submit_bio_noacct(bio);
> @@ -1236,6 +1241,12 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>   	mddev_trace_remap(mddev, read_bio, r10_bio->sector);
>   	submit_bio_noacct(read_bio);
>   	return;
> +err_handle:
> +	atomic_dec(&rdev->nr_pending);

I just realized that for the raid1 patch, this is missed. read_balance()
from raid1 will increase nr_pending as well. :(

> +
> +	bio->bi_status = errno_to_blk_status(error);
> +	set_bit(R10BIO_Uptodate, &r10_bio->state);
> +	raid_end_bio_io(r10_bio);
>   }
>   
>   static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
> @@ -1347,9 +1358,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
>   				 struct r10bio *r10_bio)
>   {
>   	struct r10conf *conf = mddev->private;
> -	int i;
> +	int i, k;
>   	sector_t sectors;
>   	int max_sectors;
> +	int error;
>   
>   	if ((mddev_is_clustered(mddev) &&
>   	     md_cluster_ops->area_resyncing(mddev, WRITE,
> @@ -1482,6 +1494,10 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
>   	if (r10_bio->sectors < bio_sectors(bio)) {
>   		struct bio *split = bio_split(bio, r10_bio->sectors,
>   					      GFP_NOIO, &conf->bio_split);
> +		if (IS_ERR(split)) {
> +			error = PTR_ERR(split);
> +			goto err_handle;
> +		}
>   		bio_chain(split, bio);
>   		allow_barrier(conf);
>   		submit_bio_noacct(bio);
> @@ -1503,6 +1519,25 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
>   			raid10_write_one_disk(mddev, r10_bio, bio, true, i);
>   	}
>   	one_write_done(r10_bio);
> +	return;
> +err_handle:
> +	for (k = 0;  k < i; k++) {
> +		struct md_rdev *rdev, *rrdev;
> +
> +		rdev = conf->mirrors[k].rdev;
> +		rrdev = conf->mirrors[k].replacement;

This looks wrong, r10_bio->devs[k].devnum should be used to deference
rdev from mirrors.
> +
> +		if (rdev)
> +			rdev_dec_pending(conf->mirrors[k].rdev, mddev);
> +		if (rrdev)
> +			rdev_dec_pending(conf->mirrors[k].rdev, mddev);

This is not correct for now, for the case that rdev is all BB in the
write range, continue will be reached in the loop and rrdev is skipped(
This doesn't look correct to skip rrdev). However, I'll suggest to use:

int d = r10_bio->devs[k].devnum;
if (r10_bio->devs[k].bio == NULL)
	rdev_dec_pending(conf->mirrors[d].rdev);
if (r10_bio->devs[k].repl_bio == NULL)
	rdev_dec_pending(conf->mirrors[d].replacement);

Thanks,
Kuai

> +		r10_bio->devs[k].bio = NULL;
> +		r10_bio->devs[k].repl_bio = NULL;
> +	}
> +
> +	bio->bi_status = errno_to_blk_status(error);
> +	set_bit(R10BIO_Uptodate, &r10_bio->state);
> +	raid_end_bio_io(r10_bio);
>   }
>   
>   static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
> @@ -1644,6 +1679,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
>   	if (remainder) {
>   		split_size = stripe_size - remainder;
>   		split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
> +		if (IS_ERR(split)) {
> +			bio->bi_status = errno_to_blk_status(PTR_ERR(split));
> +			bio_endio(bio);
> +			return 0;
> +		}
>   		bio_chain(split, bio);
>   		allow_barrier(conf);
>   		/* Resend the fist split part */
> @@ -1654,6 +1694,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
>   	if (remainder) {
>   		split_size = bio_sectors(bio) - remainder;
>   		split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
> +		if (IS_ERR(split)) {
> +			bio->bi_status = errno_to_blk_status(PTR_ERR(split));
> +			bio_endio(bio);
> +			return 0;
> +		}
>   		bio_chain(split, bio);
>   		allow_barrier(conf);
>   		/* Resend the second split part */
> 

Re: [PATCH v2 7/7] md/raid10: Handle bio_split() errors
Posted by John Garry 3 weeks, 5 days ago
On 29/10/2024 11:55, Yu Kuai wrote:
> Hi,
> 
> 在 2024/10/28 23:27, John Garry 写道:
>> Add proper bio_split() error handling. For any error, call
>> raid_end_bio_io() and return. Except for discard, where we end the bio
>> directly.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>>   drivers/md/raid10.c | 47 ++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 46 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index f3bf1116794a..9c56b27b754a 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev 
>> *mddev, struct bio *bio,
>>       int slot = r10_bio->read_slot;
>>       struct md_rdev *err_rdev = NULL;
>>       gfp_t gfp = GFP_NOIO;
>> +    int error;
>>       if (slot >= 0 && r10_bio->devs[slot].rdev) {
>>           /*
>> @@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev 
>> *mddev, struct bio *bio,
>>       if (max_sectors < bio_sectors(bio)) {
>>           struct bio *split = bio_split(bio, max_sectors,
>>                             gfp, &conf->bio_split);
>> +        if (IS_ERR(split)) {
>> +            error = PTR_ERR(split);
>> +            goto err_handle;
>> +        }
>>           bio_chain(split, bio);
>>           allow_barrier(conf);
>>           submit_bio_noacct(bio);
>> @@ -1236,6 +1241,12 @@ static void raid10_read_request(struct mddev 
>> *mddev, struct bio *bio,
>>       mddev_trace_remap(mddev, read_bio, r10_bio->sector);
>>       submit_bio_noacct(read_bio);
>>       return;
>> +err_handle:
>> +    atomic_dec(&rdev->nr_pending);
> 
> I just realized that for the raid1 patch, this is missed. read_balance()
> from raid1 will increase nr_pending as well. :(

hmmm... I have the rdev_dec_pending() call for raid1 at the error label, 
which does the appropriate nr_pending dec, right? Or not?

> 
>> +
>> +    bio->bi_status = errno_to_blk_status(error);
>> +    set_bit(R10BIO_Uptodate, &r10_bio->state);
>> +    raid_end_bio_io(r10_bio);
>>   }
>>   static void raid10_write_one_disk(struct mddev *mddev, struct r10bio 
>> *r10_bio,
>> @@ -1347,9 +1358,10 @@ static void raid10_write_request(struct mddev 
>> *mddev, struct bio *bio,
>>                    struct r10bio *r10_bio)
>>   {
>>       struct r10conf *conf = mddev->private;
>> -    int i;
>> +    int i, k;
>>       sector_t sectors;
>>       int max_sectors;
>> +    int error;
>>       if ((mddev_is_clustered(mddev) &&
>>            md_cluster_ops->area_resyncing(mddev, WRITE,
>> @@ -1482,6 +1494,10 @@ static void raid10_write_request(struct mddev 
>> *mddev, struct bio *bio,
>>       if (r10_bio->sectors < bio_sectors(bio)) {
>>           struct bio *split = bio_split(bio, r10_bio->sectors,
>>                             GFP_NOIO, &conf->bio_split);
>> +        if (IS_ERR(split)) {
>> +            error = PTR_ERR(split);
>> +            goto err_handle;
>> +        }
>>           bio_chain(split, bio);
>>           allow_barrier(conf);
>>           submit_bio_noacct(bio);
>> @@ -1503,6 +1519,25 @@ static void raid10_write_request(struct mddev 
>> *mddev, struct bio *bio,
>>               raid10_write_one_disk(mddev, r10_bio, bio, true, i);
>>       }
>>       one_write_done(r10_bio);
>> +    return;
>> +err_handle:
>> +    for (k = 0;  k < i; k++) {
>> +        struct md_rdev *rdev, *rrdev;
>> +
>> +        rdev = conf->mirrors[k].rdev;
>> +        rrdev = conf->mirrors[k].replacement;
> 
> This looks wrong, r10_bio->devs[k].devnum should be used to deference
> rdev from mirrors.

ok

>> +
>> +        if (rdev)
>> +            rdev_dec_pending(conf->mirrors[k].rdev, mddev);
>> +        if (rrdev)
>> +            rdev_dec_pending(conf->mirrors[k].rdev, mddev);
> 
> This is not correct for now, for the case that rdev is all BB in the
> write range, continue will be reached in the loop and rrdev is skipped(
> This doesn't look correct to skip rrdev). However, I'll suggest to use:
> 
> int d = r10_bio->devs[k].devnum;
> if (r10_bio->devs[k].bio == NULL)

eh, should this be:
if (r10_bio->devs[k].bio != NULL)

>      rdev_dec_pending(conf->mirrors[d].rdev);
> if (r10_bio->devs[k].repl_bio == NULL)
>      rdev_dec_pending(conf->mirrors[d].replacement);
> 



> 
>> +        r10_bio->devs[k].bio = NULL;
>> +        r10_bio->devs[k].repl_bio = NULL;
>> +    }
>> +
>> +    bio->bi_status = errno_to_blk_status(error);
>> +    set_bit(R10BIO_Uptodate, &r10_bio->state);
>> +    raid_end_bio_io(r10_bio);

Thanks,
John
Re: [PATCH v2 7/7] md/raid10: Handle bio_split() errors
Posted by Yu Kuai 3 weeks, 5 days ago
Hi,

在 2024/10/29 20:05, John Garry 写道:
> On 29/10/2024 11:55, Yu Kuai wrote:
>> Hi,
>>
>> 在 2024/10/28 23:27, John Garry 写道:
>>> Add proper bio_split() error handling. For any error, call
>>> raid_end_bio_io() and return. Except for discard, where we end the bio
>>> directly.
>>>
>>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>>> ---
>>>   drivers/md/raid10.c | 47 ++++++++++++++++++++++++++++++++++++++++++++-
>>>   1 file changed, 46 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>> index f3bf1116794a..9c56b27b754a 100644
>>> --- a/drivers/md/raid10.c
>>> +++ b/drivers/md/raid10.c
>>> @@ -1159,6 +1159,7 @@ static void raid10_read_request(struct mddev 
>>> *mddev, struct bio *bio,
>>>       int slot = r10_bio->read_slot;
>>>       struct md_rdev *err_rdev = NULL;
>>>       gfp_t gfp = GFP_NOIO;
>>> +    int error;
>>>       if (slot >= 0 && r10_bio->devs[slot].rdev) {
>>>           /*
>>> @@ -1206,6 +1207,10 @@ static void raid10_read_request(struct mddev 
>>> *mddev, struct bio *bio,
>>>       if (max_sectors < bio_sectors(bio)) {
>>>           struct bio *split = bio_split(bio, max_sectors,
>>>                             gfp, &conf->bio_split);
>>> +        if (IS_ERR(split)) {
>>> +            error = PTR_ERR(split);
>>> +            goto err_handle;
>>> +        }
>>>           bio_chain(split, bio);
>>>           allow_barrier(conf);
>>>           submit_bio_noacct(bio);
>>> @@ -1236,6 +1241,12 @@ static void raid10_read_request(struct mddev 
>>> *mddev, struct bio *bio,
>>>       mddev_trace_remap(mddev, read_bio, r10_bio->sector);
>>>       submit_bio_noacct(read_bio);
>>>       return;
>>> +err_handle:
>>> +    atomic_dec(&rdev->nr_pending);
>>
>> I just realized that for the raid1 patch, this is missed. read_balance()
>> from raid1 will increase nr_pending as well. :(
> 
> hmmm... I have the rdev_dec_pending() call for raid1 at the error label, 
> which does the appropriate nr_pending dec, right? Or not?

Looks not, I'll reply here. :)
> 
>>
>>> +
>>> +    bio->bi_status = errno_to_blk_status(error);
>>> +    set_bit(R10BIO_Uptodate, &r10_bio->state);
>>> +    raid_end_bio_io(r10_bio);
>>>   }
>>>   static void raid10_write_one_disk(struct mddev *mddev, struct 
>>> r10bio *r10_bio,
>>> @@ -1347,9 +1358,10 @@ static void raid10_write_request(struct mddev 
>>> *mddev, struct bio *bio,
>>>                    struct r10bio *r10_bio)
>>>   {
>>>       struct r10conf *conf = mddev->private;
>>> -    int i;
>>> +    int i, k;
>>>       sector_t sectors;
>>>       int max_sectors;
>>> +    int error;
>>>       if ((mddev_is_clustered(mddev) &&
>>>            md_cluster_ops->area_resyncing(mddev, WRITE,
>>> @@ -1482,6 +1494,10 @@ static void raid10_write_request(struct mddev 
>>> *mddev, struct bio *bio,
>>>       if (r10_bio->sectors < bio_sectors(bio)) {
>>>           struct bio *split = bio_split(bio, r10_bio->sectors,
>>>                             GFP_NOIO, &conf->bio_split);
>>> +        if (IS_ERR(split)) {
>>> +            error = PTR_ERR(split);
>>> +            goto err_handle;
>>> +        }
>>>           bio_chain(split, bio);
>>>           allow_barrier(conf);
>>>           submit_bio_noacct(bio);
>>> @@ -1503,6 +1519,25 @@ static void raid10_write_request(struct mddev 
>>> *mddev, struct bio *bio,
>>>               raid10_write_one_disk(mddev, r10_bio, bio, true, i);
>>>       }
>>>       one_write_done(r10_bio);
>>> +    return;
>>> +err_handle:
>>> +    for (k = 0;  k < i; k++) {
>>> +        struct md_rdev *rdev, *rrdev;
>>> +
>>> +        rdev = conf->mirrors[k].rdev;
>>> +        rrdev = conf->mirrors[k].replacement;
>>
>> This looks wrong, r10_bio->devs[k].devnum should be used to deference
>> rdev from mirrors.
> 
> ok
> 
>>> +
>>> +        if (rdev)
>>> +            rdev_dec_pending(conf->mirrors[k].rdev, mddev);
>>> +        if (rrdev)
>>> +            rdev_dec_pending(conf->mirrors[k].rdev, mddev);
>>
>> This is not correct for now, for the case that rdev is all BB in the
>> write range, continue will be reached in the loop and rrdev is skipped(
>> This doesn't look correct to skip rrdev). However, I'll suggest to use:
>>
>> int d = r10_bio->devs[k].devnum;
>> if (r10_bio->devs[k].bio == NULL)
> 
> eh, should this be:
> if (r10_bio->devs[k].bio != NULL)

Of course, sorry about the typo.

Thanks,
Kuai

> 
>>      rdev_dec_pending(conf->mirrors[d].rdev);
>> if (r10_bio->devs[k].repl_bio == NULL)
>>      rdev_dec_pending(conf->mirrors[d].replacement);
>>
> 
> 
> 
>>
>>> +        r10_bio->devs[k].bio = NULL;
>>> +        r10_bio->devs[k].repl_bio = NULL;
>>> +    }
>>> +
>>> +    bio->bi_status = errno_to_blk_status(error);
>>> +    set_bit(R10BIO_Uptodate, &r10_bio->state);
>>> +    raid_end_bio_io(r10_bio);
> 
> Thanks,
> John
> 
> .
>