[PATCH v4 08/19] ublk: move offset check out of __ublk_check_and_get_req()

Caleb Sander Mateos posted 19 patches 4 weeks, 1 day ago
[PATCH v4 08/19] ublk: move offset check out of __ublk_check_and_get_req()
Posted by Caleb Sander Mateos 4 weeks, 1 day ago
__ublk_check_and_get_req() checks that the passed in offset is within
the data length of the specified ublk request. However, only user copy
(ublk_check_and_get_req()) supports accessing ublk request data at a
nonzero offset. Zero-copy buffer registration (ublk_register_io_buf())
always passes 0 for the offset, so the check is unnecessary. Move the
check from __ublk_check_and_get_req() to ublk_check_and_get_req().

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/block/ublk_drv.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index e7697dc4a812..8eefb838b563 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -253,11 +253,11 @@ struct ublk_params_header {
 
 static void ublk_io_release(void *priv);
 static void ublk_stop_dev_unlocked(struct ublk_device *ub);
 static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq);
 static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
-		u16 q_id, u16 tag, struct ublk_io *io, size_t offset);
+		u16 q_id, u16 tag, struct ublk_io *io);
 static inline unsigned int ublk_req_build_flags(struct request *req);
 
 static void ublk_partition_scan_work(struct work_struct *work)
 {
 	struct ublk_device *ub =
@@ -2288,11 +2288,11 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd,
 	int ret;
 
 	if (!ublk_dev_support_zero_copy(ub))
 		return -EINVAL;
 
-	req = __ublk_check_and_get_req(ub, q_id, tag, io, 0);
+	req = __ublk_check_and_get_req(ub, q_id, tag, io);
 	if (!req)
 		return -EINVAL;
 
 	ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index,
 				      issue_flags);
@@ -2582,11 +2582,11 @@ static int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
 			__func__, cmd_op, tag, ret, io ? io->flags : 0);
 	return ret;
 }
 
 static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
-		u16 q_id, u16 tag, struct ublk_io *io, size_t offset)
+		u16 q_id, u16 tag, struct ublk_io *io)
 {
 	struct request *req;
 
 	/*
 	 * can't use io->req in case of concurrent UBLK_IO_COMMIT_AND_FETCH_REQ,
@@ -2603,13 +2603,10 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
 		goto fail_put;
 
 	if (!ublk_rq_has_data(req))
 		goto fail_put;
 
-	if (offset > blk_rq_bytes(req))
-		goto fail_put;
-
 	return req;
 fail_put:
 	ublk_put_req_ref(io, req);
 	return NULL;
 }
@@ -2687,14 +2684,19 @@ ublk_user_copy(struct kiocb *iocb, struct iov_iter *iter, int dir)
 
 	if (tag >= ub->dev_info.queue_depth)
 		return -EINVAL;
 
 	io = &ubq->ios[tag];
-	req = __ublk_check_and_get_req(ub, q_id, tag, io, buf_off);
+	req = __ublk_check_and_get_req(ub, q_id, tag, io);
 	if (!req)
 		return -EINVAL;
 
+	if (buf_off > blk_rq_bytes(req)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	if (!ublk_check_ubuf_dir(req, dir)) {
 		ret = -EACCES;
 		goto out;
 	}
 
-- 
2.45.2
Re: [PATCH v4 08/19] ublk: move offset check out of __ublk_check_and_get_req()
Posted by Alexander Atanasov 3 weeks, 4 days ago
On 8.01.26 11:19, Caleb Sander Mateos wrote:
> __ublk_check_and_get_req() checks that the passed in offset is within
> the data length of the specified ublk request. However, only user copy
> (ublk_check_and_get_req()) supports accessing ublk request data at a
> nonzero offset. Zero-copy buffer registration (ublk_register_io_buf())
> always passes 0 for the offset, so the check is unnecessary. Move the
> check from __ublk_check_and_get_req() to ublk_check_and_get_req().
> 
> Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
> Reviewed-by: Ming Lei <ming.lei@redhat.com>
> ---
>   drivers/block/ublk_drv.c | 16 +++++++++-------
>   1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index e7697dc4a812..8eefb838b563 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -253,11 +253,11 @@ struct ublk_params_header {
>   

[snip]

> @@ -2603,13 +2603,10 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
>   		goto fail_put;
>   
>   	if (!ublk_rq_has_data(req))
>   		goto fail_put;
>   
> -	if (offset > blk_rq_bytes(req))
> -		goto fail_put;
> -
>   	return req;
>   fail_put:
>   	ublk_put_req_ref(io, req);
>   	return NULL;
>   }
> @@ -2687,14 +2684,19 @@ ublk_user_copy(struct kiocb *iocb, struct iov_iter *iter, int dir)
>   
>   	if (tag >= ub->dev_info.queue_depth)
>   		return -EINVAL;
>   
>   	io = &ubq->ios[tag];
> -	req = __ublk_check_and_get_req(ub, q_id, tag, io, buf_off);
> +	req = __ublk_check_and_get_req(ub, q_id, tag, io);
>   	if (!req)
>   		return -EINVAL;
>   
> +	if (buf_off > blk_rq_bytes(req)) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +

Offset is zero based, bytes are count so it should be >= here.

It will work this way but for buf_off == blk_rq_bytes(req) user will get 
0 instead of EINVAL.

static size_t ublk_copy_user_pages(const struct request *req,
                 unsigned offset, struct iov_iter *uiter, int dir)
{
	size_t done = 0;
...
         rq_for_each_segment(bv, req, iter) {
...
                 if (offset >= bv.bv_len) {
                         offset -= bv.bv_len; // bv_len is same as 
blk_rq_bytes(req)
                         continue; // this breaks the loop when ==
                 }
...
	}
	return done; // done is never incremented
}

>   	if (!ublk_check_ubuf_dir(req, dir)) {
>   		ret = -EACCES;
>   		goto out;
>   	}


-- 
have fun,
alex
Re: [PATCH v4 08/19] ublk: move offset check out of __ublk_check_and_get_req()
Posted by Caleb Sander Mateos 3 weeks, 4 days ago
On Mon, Jan 12, 2026 at 10:17 AM Alexander Atanasov <alex@zazolabs.com> wrote:
>
> On 8.01.26 11:19, Caleb Sander Mateos wrote:
> > __ublk_check_and_get_req() checks that the passed in offset is within
> > the data length of the specified ublk request. However, only user copy
> > (ublk_check_and_get_req()) supports accessing ublk request data at a
> > nonzero offset. Zero-copy buffer registration (ublk_register_io_buf())
> > always passes 0 for the offset, so the check is unnecessary. Move the
> > check from __ublk_check_and_get_req() to ublk_check_and_get_req().
> >
> > Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
> > Reviewed-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >   drivers/block/ublk_drv.c | 16 +++++++++-------
> >   1 file changed, 9 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> > index e7697dc4a812..8eefb838b563 100644
> > --- a/drivers/block/ublk_drv.c
> > +++ b/drivers/block/ublk_drv.c
> > @@ -253,11 +253,11 @@ struct ublk_params_header {
> >
>
> [snip]
>
> > @@ -2603,13 +2603,10 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
> >               goto fail_put;
> >
> >       if (!ublk_rq_has_data(req))
> >               goto fail_put;
> >
> > -     if (offset > blk_rq_bytes(req))
> > -             goto fail_put;
> > -
> >       return req;
> >   fail_put:
> >       ublk_put_req_ref(io, req);
> >       return NULL;
> >   }
> > @@ -2687,14 +2684,19 @@ ublk_user_copy(struct kiocb *iocb, struct iov_iter *iter, int dir)
> >
> >       if (tag >= ub->dev_info.queue_depth)
> >               return -EINVAL;
> >
> >       io = &ubq->ios[tag];
> > -     req = __ublk_check_and_get_req(ub, q_id, tag, io, buf_off);
> > +     req = __ublk_check_and_get_req(ub, q_id, tag, io);
> >       if (!req)
> >               return -EINVAL;
> >
> > +     if (buf_off > blk_rq_bytes(req)) {
> > +             ret = -EINVAL;
> > +             goto out;
> > +     }
> > +
>
> Offset is zero based, bytes are count so it should be >= here.
>
> It will work this way but for buf_off == blk_rq_bytes(req) user will get
> 0 instead of EINVAL.

This is the existing behavior in __ublk_check_and_get_req(). I agree
allowing buf_off == blk_rq_bytes(req) seems odd, but changing it now
could break ublk servers relying on the current behavior.

Best,
Caleb

>
> static size_t ublk_copy_user_pages(const struct request *req,
>                  unsigned offset, struct iov_iter *uiter, int dir)
> {
>         size_t done = 0;
> ...
>          rq_for_each_segment(bv, req, iter) {
> ...
>                  if (offset >= bv.bv_len) {
>                          offset -= bv.bv_len; // bv_len is same as
> blk_rq_bytes(req)
>                          continue; // this breaks the loop when ==
>                  }
> ...
>         }
>         return done; // done is never incremented
> }
>
> >       if (!ublk_check_ubuf_dir(req, dir)) {
> >               ret = -EACCES;
> >               goto out;
> >       }
>
>
> --
> have fun,
> alex
>
Re: [PATCH v4 08/19] ublk: move offset check out of __ublk_check_and_get_req()
Posted by Alexander Atanasov 3 weeks, 4 days ago
On 12.01.26 20:29, Caleb Sander Mateos wrote:
> On Mon, Jan 12, 2026 at 10:17 AM Alexander Atanasov <alex@zazolabs.com> wrote:
>>
>> On 8.01.26 11:19, Caleb Sander Mateos wrote:
>>> __ublk_check_and_get_req() checks that the passed in offset is within
>>> the data length of the specified ublk request. However, only user copy
>>> (ublk_check_and_get_req()) supports accessing ublk request data at a
>>> nonzero offset. Zero-copy buffer registration (ublk_register_io_buf())
>>> always passes 0 for the offset, so the check is unnecessary. Move the
>>> check from __ublk_check_and_get_req() to ublk_check_and_get_req().
>>>
>>> Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
>>> Reviewed-by: Ming Lei <ming.lei@redhat.com>
>>> ---
>>>    drivers/block/ublk_drv.c | 16 +++++++++-------
>>>    1 file changed, 9 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
>>> index e7697dc4a812..8eefb838b563 100644
>>> --- a/drivers/block/ublk_drv.c
>>> +++ b/drivers/block/ublk_drv.c
>>> @@ -253,11 +253,11 @@ struct ublk_params_header {
>>>
>>
>> [snip]
>>
>>> @@ -2603,13 +2603,10 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
>>>                goto fail_put;
>>>
>>>        if (!ublk_rq_has_data(req))
>>>                goto fail_put;
>>>
>>> -     if (offset > blk_rq_bytes(req))
>>> -             goto fail_put;
>>> -
>>>        return req;
>>>    fail_put:
>>>        ublk_put_req_ref(io, req);
>>>        return NULL;
>>>    }
>>> @@ -2687,14 +2684,19 @@ ublk_user_copy(struct kiocb *iocb, struct iov_iter *iter, int dir)
>>>
>>>        if (tag >= ub->dev_info.queue_depth)
>>>                return -EINVAL;
>>>
>>>        io = &ubq->ios[tag];
>>> -     req = __ublk_check_and_get_req(ub, q_id, tag, io, buf_off);
>>> +     req = __ublk_check_and_get_req(ub, q_id, tag, io);
>>>        if (!req)
>>>                return -EINVAL;
>>>
>>> +     if (buf_off > blk_rq_bytes(req)) {
>>> +             ret = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +
>>
>> Offset is zero based, bytes are count so it should be >= here.
>>
>> It will work this way but for buf_off == blk_rq_bytes(req) user will get
>> 0 instead of EINVAL.
> 
> This is the existing behavior in __ublk_check_and_get_req(). I agree
> allowing buf_off == blk_rq_bytes(req) seems odd, but changing it now
> could break ublk servers relying on the current behavior.


I saw it came from the existing version but I doubt that any existing 
server rely on this. In general no code expects to get EOF from a block 
device. It is a user error, classic off by one, to give offset equal to 
the end. If the server have sane error handling it should either detect 
it has a bug and fix it, or does not care at all and work as expected.

The usual pattern is variation of:

while (left > 0) {
     ret = read|write(buf+offset, ....);
     if (ret < 0) goto err;
     left -= ret;
     offset += ret;
}

This gets into a nice infinite loop, and I have actually hit this kind 
of bug in other unrelated code inside the kernel - I guess it is present 
in the original code this is based on.

For example there is/was a case in ext4 that initially returned 0 for a 
write in some edge case but that was changed to return a proper -EAGAIN 
later on iirc to avoid such confusion.

So, if it is not required to be like this by some standard,
it might be worth considering to change.


> Best,
> Caleb
> 
>>
>> static size_t ublk_copy_user_pages(const struct request *req,
>>                   unsigned offset, struct iov_iter *uiter, int dir)
>> {
>>          size_t done = 0;
>> ...
>>           rq_for_each_segment(bv, req, iter) {
>> ...
>>                   if (offset >= bv.bv_len) {
>>                           offset -= bv.bv_len; // bv_len is same as
>> blk_rq_bytes(req)
>>                           continue; // this breaks the loop when ==
>>                   }
>> ...
>>          }
>>          return done; // done is never incremented
>> }
>>
>>>        if (!ublk_check_ubuf_dir(req, dir)) {
>>>                ret = -EACCES;
>>>                goto out;
>>>        }
>>
>>
>> --
>> have fun,
>> alex
>>

-- 
have fun,
alex