[PATCH] scsi: bsg: copy uring_cmd payload to prevent double-fetch from shared SQE

Rahul Chandelkar posted 1 patch 1 week, 5 days ago
drivers/scsi/scsi_bsg.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
[PATCH] scsi: bsg: copy uring_cmd payload to prevent double-fetch from shared SQE
Posted by Rahul Chandelkar 1 week, 5 days ago
scsi_bsg_uring_cmd() and scsi_bsg_map_user_buffer() read bsg_uring_cmd
fields directly from the shared mmap'd io_uring submission ring via
io_uring_sqe128_cmd().  On the inline execution path, io_uring has not
yet copied the SQE to kernel memory, so a concurrent userspace thread
can modify fields between reads.

cmd->request_len is read for the bounds check, for the cmd_len
assignment, and for the copy_from_user length.  A racing thread can
change request_len between the bounds check (passes with <= 32) and
copy_from_user (uses the enlarged value), overflowing the 32-byte
scmd->cmnd[] buffer into subsequent struct scsi_cmnd fields.

scsi_bsg_map_user_buffer() independently re-derives its cmd pointer
from the same shared SQE, re-reading dout_xfer_len, din_xfer_len,
dout_xferp, and din_xferp, enabling direction confusion and buffer
length races.

Copy struct bsg_uring_cmd to a stack-local variable before use in both
functions.  The pointer variable 'cmd' is redirected to the local copy
so the rest of each function is unchanged.

Tested with KASAN on QEMU (virtio-scsi, 2 vCPUs).  Without this fix,
a two-thread race produces:

  BUG: KASAN: wild-memory-access in scsi_queue_rq+0x4a3/0x58a0
  Write of size 96 at addr dead000000001000 by task poc/67
  Call Trace:
   kasan_report+0xce/0x100
   __asan_memset+0x23/0x50
   scsi_queue_rq+0x4a3/0x58a0
   scsi_bsg_uring_cmd+0x942/0x1570
   io_uring_cmd+0x2f6/0x950
   io_issue_sqe+0xe5/0x22d0

Fixes: 7b6d3255e7f8 ("scsi: bsg: add io_uring passthrough handler")
Cc: stable@vger.kernel.org
Signed-off-by: Rahul Chandelkar <rc@rexion.ai>
---
 drivers/scsi/scsi_bsg.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
index e80dec53174e..244740655eb0 100644
--- a/drivers/scsi/scsi_bsg.c
+++ b/drivers/scsi/scsi_bsg.c
@@ -78,13 +78,21 @@ static int scsi_bsg_map_user_buffer(struct request *req,
 				    struct io_uring_cmd *ioucmd,
 				    unsigned int issue_flags, gfp_t gfp_mask)
 {
-	const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
-	bool is_write = cmd->dout_xfer_len > 0;
-	u64 buf_addr = is_write ? cmd->dout_xferp : cmd->din_xferp;
-	unsigned long buf_len = is_write ? cmd->dout_xfer_len : cmd->din_xfer_len;
+	struct bsg_uring_cmd local_cmd;
+	const struct bsg_uring_cmd *cmd;
+	bool is_write;
+	u64 buf_addr;
+	unsigned long buf_len;
 	struct iov_iter iter;
 	int ret;
 
+	memcpy(&local_cmd, io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd),
+	       sizeof(local_cmd));
+	cmd = &local_cmd;
+	is_write = cmd->dout_xfer_len > 0;
+	buf_addr = is_write ? cmd->dout_xferp : cmd->din_xferp;
+	buf_len = is_write ? cmd->dout_xfer_len : cmd->din_xfer_len;
+
 	if (ioucmd->flags & IORING_URING_CMD_FIXED) {
 		ret = io_uring_cmd_import_fixed(buf_addr, buf_len,
 						is_write ? WRITE : READ,
@@ -104,13 +112,18 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
 			       unsigned int issue_flags, bool open_for_write)
 {
 	struct scsi_bsg_uring_cmd_pdu *pdu = scsi_bsg_uring_cmd_pdu(ioucmd);
-	const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
+	struct bsg_uring_cmd local_cmd;
+	const struct bsg_uring_cmd *cmd;
 	struct scsi_cmnd *scmd;
 	struct request *req;
 	blk_mq_req_flags_t blk_flags = 0;
 	gfp_t gfp_mask = GFP_KERNEL;
 	int ret;
 
+	memcpy(&local_cmd, io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd),
+	       sizeof(local_cmd));
+	cmd = &local_cmd;
+
 	if (cmd->protocol != BSG_PROTOCOL_SCSI ||
 	    cmd->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
 		return -EINVAL;
-- 
2.54.0
Re: [PATCH] scsi: bsg: copy uring_cmd payload to prevent double-fetch from shared SQE
Posted by Jens Axboe 1 week, 5 days ago
On 5/27/26 4:59 AM, Rahul Chandelkar wrote:
> scsi_bsg_uring_cmd() and scsi_bsg_map_user_buffer() read bsg_uring_cmd
> fields directly from the shared mmap'd io_uring submission ring via
> io_uring_sqe128_cmd().  On the inline execution path, io_uring has not
> yet copied the SQE to kernel memory, so a concurrent userspace thread
> can modify fields between reads.
> 
> cmd->request_len is read for the bounds check, for the cmd_len
> assignment, and for the copy_from_user length.  A racing thread can
> change request_len between the bounds check (passes with <= 32) and
> copy_from_user (uses the enlarged value), overflowing the 32-byte
> scmd->cmnd[] buffer into subsequent struct scsi_cmnd fields.
> 
> scsi_bsg_map_user_buffer() independently re-derives its cmd pointer
> from the same shared SQE, re-reading dout_xfer_len, din_xfer_len,
> dout_xferp, and din_xferp, enabling direction confusion and buffer
> length races.
> 
> Copy struct bsg_uring_cmd to a stack-local variable before use in both
> functions.  The pointer variable 'cmd' is redirected to the local copy
> so the rest of each function is unchanged.
> 
> Tested with KASAN on QEMU (virtio-scsi, 2 vCPUs).  Without this fix,
> a two-thread race produces:
> 
>   BUG: KASAN: wild-memory-access in scsi_queue_rq+0x4a3/0x58a0
>   Write of size 96 at addr dead000000001000 by task poc/67
>   Call Trace:
>    kasan_report+0xce/0x100
>    __asan_memset+0x23/0x50
>    scsi_queue_rq+0x4a3/0x58a0
>    scsi_bsg_uring_cmd+0x942/0x1570
>    io_uring_cmd+0x2f6/0x950
>    io_issue_sqe+0xe5/0x22d0

I don't think this is the right way to fix it, ->sqe should've been
stable upfront if this ends up happening. Can you share your poc with
me? Your trace has been trimmed down way too much to be useful.

-- 
Jens Axboe
Re: [PATCH] scsi: bsg: copy uring_cmd payload to prevent double-fetch from shared SQE
Posted by Bart Van Assche 1 week, 5 days ago
On 5/27/26 3:59 AM, Rahul Chandelkar wrote:
> scsi_bsg_uring_cmd() and scsi_bsg_map_user_buffer() read bsg_uring_cmd
> fields directly from the shared mmap'd io_uring submission ring via
> io_uring_sqe128_cmd().  On the inline execution path, io_uring has not
> yet copied the SQE to kernel memory, so a concurrent userspace thread
> can modify fields between reads.
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
[PATCH v2] scsi: bsg: read io_uring command fields once
Posted by Rahul Chandelkar 1 week, 4 days ago
scsi_bsg_uring_cmd() reads struct bsg_uring_cmd fields directly from the
shared mmap'd io_uring SQE.  On the inline execution path, io_uring may
still point at userspace-visible SQE storage, so a concurrent userspace
thread can change fields between validation and use.

request_len is checked against the size of scmd->cmnd, then used again for
scmd->cmd_len and copy_from_user().  If userspace changes request_len after
the bounds check, the later copy can overflow the 32-byte scmd->cmnd
buffer.  Transfer fields are also read again by scsi_bsg_map_user_buffer(),
leaving direction, address and length open to the same race.

Use READ_ONCE() to load each bsg_uring_cmd field needed by
scsi_bsg_uring_cmd() into a local variable, then use those locals for both
validation and execution.  Pass the stable transfer direction, address and
length into scsi_bsg_map_user_buffer() so the helper no longer re-derives
them from the SQE.

This fixes the double-fetch without copying the whole io_uring command
payload.

Tested with KASAN on QEMU (virtio-scsi, 2 vCPUs).  Without this fix, a
two-thread race produces:

  BUG: KASAN: wild-memory-access in scsi_queue_rq+0x4a3/0x58a0
  Write of size 96 at addr dead000000001000 by task poc/67
  Call Trace:
   kasan_report+0xce/0x100
   __asan_memset+0x23/0x50
   scsi_queue_rq+0x4a3/0x58a0
   scsi_bsg_uring_cmd+0x942/0x1570
   io_uring_cmd+0x2f6/0x950
   io_issue_sqe+0xe5/0x22d0

Link: https://lore.kernel.org/all/20260527105931.3950913-1-rc@rexion.ai/T/#u
Fixes: 7b6d3255e7f8 ("scsi: bsg: add io_uring passthrough handler")
Cc: stable@vger.kernel.org
Signed-off-by: Rahul Chandelkar <rc@rexion.ai>
---
Changes in v2:
- Use READ_ONCE() for individual fields instead of memcpying the command
  payload.
- Pass stable transfer parameters to scsi_bsg_map_user_buffer() so it does
  not re-read the SQE.
- Do not carry the Reviewed-by tag from v1 because the implementation
  strategy changed.

 drivers/scsi/scsi_bsg.c | 54 ++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
index e80dec53174e..ccbe3d98e4ff 100644
--- a/drivers/scsi/scsi_bsg.c
+++ b/drivers/scsi/scsi_bsg.c
@@ -76,12 +76,10 @@ static enum rq_end_io_ret scsi_bsg_uring_cmd_done(struct request *req,
 
 static int scsi_bsg_map_user_buffer(struct request *req,
 				    struct io_uring_cmd *ioucmd,
-				    unsigned int issue_flags, gfp_t gfp_mask)
+				    unsigned int issue_flags, gfp_t gfp_mask,
+				    bool is_write, u64 buf_addr,
+				    unsigned long buf_len)
 {
-	const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
-	bool is_write = cmd->dout_xfer_len > 0;
-	u64 buf_addr = is_write ? cmd->dout_xferp : cmd->din_xferp;
-	unsigned long buf_len = is_write ? cmd->dout_xfer_len : cmd->din_xfer_len;
 	struct iov_iter iter;
 	int ret;
 
@@ -104,26 +102,40 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
 			       unsigned int issue_flags, bool open_for_write)
 {
 	struct scsi_bsg_uring_cmd_pdu *pdu = scsi_bsg_uring_cmd_pdu(ioucmd);
-	const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
+	const struct bsg_uring_cmd *cmd =
+		io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
 	struct scsi_cmnd *scmd;
 	struct request *req;
 	blk_mq_req_flags_t blk_flags = 0;
 	gfp_t gfp_mask = GFP_KERNEL;
+	u64 request = READ_ONCE(cmd->request);
+	u32 request_len = READ_ONCE(cmd->request_len);
+	u32 protocol = READ_ONCE(cmd->protocol);
+	u32 subprotocol = READ_ONCE(cmd->subprotocol);
+	u32 max_response_len = READ_ONCE(cmd->max_response_len);
+	u64 response = READ_ONCE(cmd->response);
+	u64 dout_xferp = READ_ONCE(cmd->dout_xferp);
+	u32 dout_xfer_len = READ_ONCE(cmd->dout_xfer_len);
+	u32 dout_iovec_count = READ_ONCE(cmd->dout_iovec_count);
+	u64 din_xferp = READ_ONCE(cmd->din_xferp);
+	u32 din_xfer_len = READ_ONCE(cmd->din_xfer_len);
+	u32 din_iovec_count = READ_ONCE(cmd->din_iovec_count);
+	u32 timeout_ms = READ_ONCE(cmd->timeout_ms);
 	int ret;
 
-	if (cmd->protocol != BSG_PROTOCOL_SCSI ||
-	    cmd->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
+	if (protocol != BSG_PROTOCOL_SCSI ||
+	    subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
 		return -EINVAL;
 
-	if (!cmd->request || cmd->request_len == 0)
+	if (!request || request_len == 0)
 		return -EINVAL;
 
-	if (cmd->dout_xfer_len && cmd->din_xfer_len) {
+	if (dout_xfer_len && din_xfer_len) {
 		pr_warn_once("BIDI support in bsg has been removed.\n");
 		return -EOPNOTSUPP;
 	}
 
-	if (cmd->dout_iovec_count > 0 || cmd->din_iovec_count > 0)
+	if (dout_iovec_count > 0 || din_iovec_count > 0)
 		return -EOPNOTSUPP;
 
 	if (issue_flags & IO_URING_F_NONBLOCK) {
@@ -131,20 +143,20 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
 		gfp_mask = GFP_NOWAIT;
 	}
 
-	req = scsi_alloc_request(q, cmd->dout_xfer_len ?
+	req = scsi_alloc_request(q, dout_xfer_len ?
 				 REQ_OP_DRV_OUT : REQ_OP_DRV_IN, blk_flags);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
 	scmd = blk_mq_rq_to_pdu(req);
-	if (cmd->request_len > sizeof(scmd->cmnd)) {
+	if (request_len > sizeof(scmd->cmnd)) {
 		ret = -EINVAL;
 		goto out_free_req;
 	}
-	scmd->cmd_len = cmd->request_len;
+	scmd->cmd_len = request_len;
 	scmd->allowed = SG_DEFAULT_RETRIES;
 
-	if (copy_from_user(scmd->cmnd, uptr64(cmd->request), cmd->request_len)) {
+	if (copy_from_user(scmd->cmnd, uptr64(request), request_len)) {
 		ret = -EFAULT;
 		goto out_free_req;
 	}
@@ -154,12 +166,18 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
 		goto out_free_req;
 	}
 
-	pdu->response_addr = cmd->response;
-	scmd->sense_len = cmd->max_response_len ?
-		min(cmd->max_response_len, SCSI_SENSE_BUFFERSIZE) : SCSI_SENSE_BUFFERSIZE;
+	pdu->response_addr = response;
+	scmd->sense_len = max_response_len ?
+		min(max_response_len, SCSI_SENSE_BUFFERSIZE) : SCSI_SENSE_BUFFERSIZE;
 
-	if (cmd->dout_xfer_len || cmd->din_xfer_len) {
-		ret = scsi_bsg_map_user_buffer(req, ioucmd, issue_flags, gfp_mask);
+	if (dout_xfer_len || din_xfer_len) {
+		bool is_write = dout_xfer_len > 0;
+		u64 buf_addr = is_write ? dout_xferp : din_xferp;
+		unsigned long buf_len = is_write ? dout_xfer_len : din_xfer_len;
+
+		ret = scsi_bsg_map_user_buffer(req, ioucmd, issue_flags,
+					       gfp_mask, is_write, buf_addr,
+					       buf_len);
 		if (ret)
 			goto out_free_req;
 		pdu->bio = req->bio;
@@ -167,8 +185,8 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
 		pdu->bio = NULL;
 	}
 
-	req->timeout = cmd->timeout_ms ?
-		msecs_to_jiffies(cmd->timeout_ms) : BLK_DEFAULT_SG_TIMEOUT;
+	req->timeout = timeout_ms ?
+		msecs_to_jiffies(timeout_ms) : BLK_DEFAULT_SG_TIMEOUT;
 
 	req->end_io = scsi_bsg_uring_cmd_done;
 	req->end_io_data = ioucmd;
-- 
2.54.0
Re: [PATCH v2] scsi: bsg: read io_uring command fields once
Posted by Yang Xiuwei 1 week, 1 day ago
Hi Rahul,

Thanks for the report and for v2.

Reviewed-by: Yang Xiuwei <yangxiuwei@kylinos.cn>