drivers/scsi/scsi_bsg.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-)
scsi_bsg_uring_cmd() and scsi_bsg_map_user_buffer() read bsg_uring_cmd
fields directly from the shared mmap'd io_uring submission ring via
io_uring_sqe128_cmd(). On the inline execution path, io_uring has not
yet copied the SQE to kernel memory, so a concurrent userspace thread
can modify fields between reads.
cmd->request_len is read for the bounds check, for the cmd_len
assignment, and for the copy_from_user length. A racing thread can
change request_len between the bounds check (passes with <= 32) and
copy_from_user (uses the enlarged value), overflowing the 32-byte
scmd->cmnd[] buffer into subsequent struct scsi_cmnd fields.
scsi_bsg_map_user_buffer() independently re-derives its cmd pointer
from the same shared SQE, re-reading dout_xfer_len, din_xfer_len,
dout_xferp, and din_xferp, enabling direction confusion and buffer
length races.
Copy struct bsg_uring_cmd to a stack-local variable before use in both
functions. The pointer variable 'cmd' is redirected to the local copy
so the rest of each function is unchanged.
Tested with KASAN on QEMU (virtio-scsi, 2 vCPUs). Without this fix,
a two-thread race produces:
BUG: KASAN: wild-memory-access in scsi_queue_rq+0x4a3/0x58a0
Write of size 96 at addr dead000000001000 by task poc/67
Call Trace:
kasan_report+0xce/0x100
__asan_memset+0x23/0x50
scsi_queue_rq+0x4a3/0x58a0
scsi_bsg_uring_cmd+0x942/0x1570
io_uring_cmd+0x2f6/0x950
io_issue_sqe+0xe5/0x22d0
Fixes: 7b6d3255e7f8 ("scsi: bsg: add io_uring passthrough handler")
Cc: stable@vger.kernel.org
Signed-off-by: Rahul Chandelkar <rc@rexion.ai>
---
drivers/scsi/scsi_bsg.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
index e80dec53174e..244740655eb0 100644
--- a/drivers/scsi/scsi_bsg.c
+++ b/drivers/scsi/scsi_bsg.c
@@ -78,13 +78,21 @@ static int scsi_bsg_map_user_buffer(struct request *req,
struct io_uring_cmd *ioucmd,
unsigned int issue_flags, gfp_t gfp_mask)
{
- const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
- bool is_write = cmd->dout_xfer_len > 0;
- u64 buf_addr = is_write ? cmd->dout_xferp : cmd->din_xferp;
- unsigned long buf_len = is_write ? cmd->dout_xfer_len : cmd->din_xfer_len;
+ struct bsg_uring_cmd local_cmd;
+ const struct bsg_uring_cmd *cmd;
+ bool is_write;
+ u64 buf_addr;
+ unsigned long buf_len;
struct iov_iter iter;
int ret;
+ memcpy(&local_cmd, io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd),
+ sizeof(local_cmd));
+ cmd = &local_cmd;
+ is_write = cmd->dout_xfer_len > 0;
+ buf_addr = is_write ? cmd->dout_xferp : cmd->din_xferp;
+ buf_len = is_write ? cmd->dout_xfer_len : cmd->din_xfer_len;
+
if (ioucmd->flags & IORING_URING_CMD_FIXED) {
ret = io_uring_cmd_import_fixed(buf_addr, buf_len,
is_write ? WRITE : READ,
@@ -104,13 +112,18 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
unsigned int issue_flags, bool open_for_write)
{
struct scsi_bsg_uring_cmd_pdu *pdu = scsi_bsg_uring_cmd_pdu(ioucmd);
- const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
+ struct bsg_uring_cmd local_cmd;
+ const struct bsg_uring_cmd *cmd;
struct scsi_cmnd *scmd;
struct request *req;
blk_mq_req_flags_t blk_flags = 0;
gfp_t gfp_mask = GFP_KERNEL;
int ret;
+ memcpy(&local_cmd, io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd),
+ sizeof(local_cmd));
+ cmd = &local_cmd;
+
if (cmd->protocol != BSG_PROTOCOL_SCSI ||
cmd->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
return -EINVAL;
--
2.54.0
On 5/27/26 4:59 AM, Rahul Chandelkar wrote: > scsi_bsg_uring_cmd() and scsi_bsg_map_user_buffer() read bsg_uring_cmd > fields directly from the shared mmap'd io_uring submission ring via > io_uring_sqe128_cmd(). On the inline execution path, io_uring has not > yet copied the SQE to kernel memory, so a concurrent userspace thread > can modify fields between reads. > > cmd->request_len is read for the bounds check, for the cmd_len > assignment, and for the copy_from_user length. A racing thread can > change request_len between the bounds check (passes with <= 32) and > copy_from_user (uses the enlarged value), overflowing the 32-byte > scmd->cmnd[] buffer into subsequent struct scsi_cmnd fields. > > scsi_bsg_map_user_buffer() independently re-derives its cmd pointer > from the same shared SQE, re-reading dout_xfer_len, din_xfer_len, > dout_xferp, and din_xferp, enabling direction confusion and buffer > length races. > > Copy struct bsg_uring_cmd to a stack-local variable before use in both > functions. The pointer variable 'cmd' is redirected to the local copy > so the rest of each function is unchanged. > > Tested with KASAN on QEMU (virtio-scsi, 2 vCPUs). Without this fix, > a two-thread race produces: > > BUG: KASAN: wild-memory-access in scsi_queue_rq+0x4a3/0x58a0 > Write of size 96 at addr dead000000001000 by task poc/67 > Call Trace: > kasan_report+0xce/0x100 > __asan_memset+0x23/0x50 > scsi_queue_rq+0x4a3/0x58a0 > scsi_bsg_uring_cmd+0x942/0x1570 > io_uring_cmd+0x2f6/0x950 > io_issue_sqe+0xe5/0x22d0 I don't think this is the right way to fix it, ->sqe should've been stable upfront if this ends up happening. Can you share your poc with me? Your trace has been trimmed down way too much to be useful. -- Jens Axboe
On 5/27/26 3:59 AM, Rahul Chandelkar wrote: > scsi_bsg_uring_cmd() and scsi_bsg_map_user_buffer() read bsg_uring_cmd > fields directly from the shared mmap'd io_uring submission ring via > io_uring_sqe128_cmd(). On the inline execution path, io_uring has not > yet copied the SQE to kernel memory, so a concurrent userspace thread > can modify fields between reads. Reviewed-by: Bart Van Assche <bvanassche@acm.org>
scsi_bsg_uring_cmd() reads struct bsg_uring_cmd fields directly from the
shared mmap'd io_uring SQE. On the inline execution path, io_uring may
still point at userspace-visible SQE storage, so a concurrent userspace
thread can change fields between validation and use.
request_len is checked against the size of scmd->cmnd, then used again for
scmd->cmd_len and copy_from_user(). If userspace changes request_len after
the bounds check, the later copy can overflow the 32-byte scmd->cmnd
buffer. Transfer fields are also read again by scsi_bsg_map_user_buffer(),
leaving direction, address and length open to the same race.
Use READ_ONCE() to load each bsg_uring_cmd field needed by
scsi_bsg_uring_cmd() into a local variable, then use those locals for both
validation and execution. Pass the stable transfer direction, address and
length into scsi_bsg_map_user_buffer() so the helper no longer re-derives
them from the SQE.
This fixes the double-fetch without copying the whole io_uring command
payload.
Tested with KASAN on QEMU (virtio-scsi, 2 vCPUs). Without this fix, a
two-thread race produces:
BUG: KASAN: wild-memory-access in scsi_queue_rq+0x4a3/0x58a0
Write of size 96 at addr dead000000001000 by task poc/67
Call Trace:
kasan_report+0xce/0x100
__asan_memset+0x23/0x50
scsi_queue_rq+0x4a3/0x58a0
scsi_bsg_uring_cmd+0x942/0x1570
io_uring_cmd+0x2f6/0x950
io_issue_sqe+0xe5/0x22d0
Link: https://lore.kernel.org/all/20260527105931.3950913-1-rc@rexion.ai/T/#u
Fixes: 7b6d3255e7f8 ("scsi: bsg: add io_uring passthrough handler")
Cc: stable@vger.kernel.org
Signed-off-by: Rahul Chandelkar <rc@rexion.ai>
---
Changes in v2:
- Use READ_ONCE() for individual fields instead of memcpying the command
payload.
- Pass stable transfer parameters to scsi_bsg_map_user_buffer() so it does
not re-read the SQE.
- Do not carry the Reviewed-by tag from v1 because the implementation
strategy changed.
drivers/scsi/scsi_bsg.c | 54 ++++++++++++++++++++++++++---------------
1 file changed, 35 insertions(+), 19 deletions(-)
diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
index e80dec53174e..ccbe3d98e4ff 100644
--- a/drivers/scsi/scsi_bsg.c
+++ b/drivers/scsi/scsi_bsg.c
@@ -76,12 +76,10 @@ static enum rq_end_io_ret scsi_bsg_uring_cmd_done(struct request *req,
static int scsi_bsg_map_user_buffer(struct request *req,
struct io_uring_cmd *ioucmd,
- unsigned int issue_flags, gfp_t gfp_mask)
+ unsigned int issue_flags, gfp_t gfp_mask,
+ bool is_write, u64 buf_addr,
+ unsigned long buf_len)
{
- const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
- bool is_write = cmd->dout_xfer_len > 0;
- u64 buf_addr = is_write ? cmd->dout_xferp : cmd->din_xferp;
- unsigned long buf_len = is_write ? cmd->dout_xfer_len : cmd->din_xfer_len;
struct iov_iter iter;
int ret;
@@ -104,26 +102,40 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
unsigned int issue_flags, bool open_for_write)
{
struct scsi_bsg_uring_cmd_pdu *pdu = scsi_bsg_uring_cmd_pdu(ioucmd);
- const struct bsg_uring_cmd *cmd = io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
+ const struct bsg_uring_cmd *cmd =
+ io_uring_sqe128_cmd(ioucmd->sqe, struct bsg_uring_cmd);
struct scsi_cmnd *scmd;
struct request *req;
blk_mq_req_flags_t blk_flags = 0;
gfp_t gfp_mask = GFP_KERNEL;
+ u64 request = READ_ONCE(cmd->request);
+ u32 request_len = READ_ONCE(cmd->request_len);
+ u32 protocol = READ_ONCE(cmd->protocol);
+ u32 subprotocol = READ_ONCE(cmd->subprotocol);
+ u32 max_response_len = READ_ONCE(cmd->max_response_len);
+ u64 response = READ_ONCE(cmd->response);
+ u64 dout_xferp = READ_ONCE(cmd->dout_xferp);
+ u32 dout_xfer_len = READ_ONCE(cmd->dout_xfer_len);
+ u32 dout_iovec_count = READ_ONCE(cmd->dout_iovec_count);
+ u64 din_xferp = READ_ONCE(cmd->din_xferp);
+ u32 din_xfer_len = READ_ONCE(cmd->din_xfer_len);
+ u32 din_iovec_count = READ_ONCE(cmd->din_iovec_count);
+ u32 timeout_ms = READ_ONCE(cmd->timeout_ms);
int ret;
- if (cmd->protocol != BSG_PROTOCOL_SCSI ||
- cmd->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
+ if (protocol != BSG_PROTOCOL_SCSI ||
+ subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
return -EINVAL;
- if (!cmd->request || cmd->request_len == 0)
+ if (!request || request_len == 0)
return -EINVAL;
- if (cmd->dout_xfer_len && cmd->din_xfer_len) {
+ if (dout_xfer_len && din_xfer_len) {
pr_warn_once("BIDI support in bsg has been removed.\n");
return -EOPNOTSUPP;
}
- if (cmd->dout_iovec_count > 0 || cmd->din_iovec_count > 0)
+ if (dout_iovec_count > 0 || din_iovec_count > 0)
return -EOPNOTSUPP;
if (issue_flags & IO_URING_F_NONBLOCK) {
@@ -131,20 +143,20 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
gfp_mask = GFP_NOWAIT;
}
- req = scsi_alloc_request(q, cmd->dout_xfer_len ?
+ req = scsi_alloc_request(q, dout_xfer_len ?
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, blk_flags);
if (IS_ERR(req))
return PTR_ERR(req);
scmd = blk_mq_rq_to_pdu(req);
- if (cmd->request_len > sizeof(scmd->cmnd)) {
+ if (request_len > sizeof(scmd->cmnd)) {
ret = -EINVAL;
goto out_free_req;
}
- scmd->cmd_len = cmd->request_len;
+ scmd->cmd_len = request_len;
scmd->allowed = SG_DEFAULT_RETRIES;
- if (copy_from_user(scmd->cmnd, uptr64(cmd->request), cmd->request_len)) {
+ if (copy_from_user(scmd->cmnd, uptr64(request), request_len)) {
ret = -EFAULT;
goto out_free_req;
}
@@ -154,12 +166,18 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
goto out_free_req;
}
- pdu->response_addr = cmd->response;
- scmd->sense_len = cmd->max_response_len ?
- min(cmd->max_response_len, SCSI_SENSE_BUFFERSIZE) : SCSI_SENSE_BUFFERSIZE;
+ pdu->response_addr = response;
+ scmd->sense_len = max_response_len ?
+ min(max_response_len, SCSI_SENSE_BUFFERSIZE) : SCSI_SENSE_BUFFERSIZE;
- if (cmd->dout_xfer_len || cmd->din_xfer_len) {
- ret = scsi_bsg_map_user_buffer(req, ioucmd, issue_flags, gfp_mask);
+ if (dout_xfer_len || din_xfer_len) {
+ bool is_write = dout_xfer_len > 0;
+ u64 buf_addr = is_write ? dout_xferp : din_xferp;
+ unsigned long buf_len = is_write ? dout_xfer_len : din_xfer_len;
+
+ ret = scsi_bsg_map_user_buffer(req, ioucmd, issue_flags,
+ gfp_mask, is_write, buf_addr,
+ buf_len);
if (ret)
goto out_free_req;
pdu->bio = req->bio;
@@ -167,8 +185,8 @@ static int scsi_bsg_uring_cmd(struct request_queue *q, struct io_uring_cmd *iouc
pdu->bio = NULL;
}
- req->timeout = cmd->timeout_ms ?
- msecs_to_jiffies(cmd->timeout_ms) : BLK_DEFAULT_SG_TIMEOUT;
+ req->timeout = timeout_ms ?
+ msecs_to_jiffies(timeout_ms) : BLK_DEFAULT_SG_TIMEOUT;
req->end_io = scsi_bsg_uring_cmd_done;
req->end_io_data = ioucmd;
--
2.54.0
© 2016 - 2026 Red Hat, Inc.