[PATCH v4] scsi: core: Fix async_scan race condition with READ_ONCE/WRITE_ONCE

Chaohai Chen posted 1 patch 1 month, 1 week ago
drivers/scsi/scsi_scan.c | 22 ++++++++--------------
include/scsi/scsi_host.h |  7 ++++---
2 files changed, 12 insertions(+), 17 deletions(-)
[PATCH v4] scsi: core: Fix async_scan race condition with READ_ONCE/WRITE_ONCE
Posted by Chaohai Chen 1 month, 1 week ago
Previously, host_lock was used to prevent bit-set conflicts in async_scan,
but this approach introduced naked reads in some code paths.

Convert async_scan from a bitfield to a bool type to eliminate bit-level
conflicts entirely. Use READ_ONCE() and WRITE_ONCE() to ensure proper
memory ordering on Alpha and satisfy KCSAN requirements.

Signed-off-by: Chaohai Chen <wdhh6@aliyun.com>
---

v1->v3:
- use READ_ONCE()/WRITE_ONCE() to fix the issue. (Christoph Hellwig, Damien Le Moal)

v3->v4:
- move async_scan before the bit field in the structure to avoid holes.(Damien Le Moal)

 drivers/scsi/scsi_scan.c | 22 ++++++++--------------
 include/scsi/scsi_host.h |  7 ++++---
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 60c06fa4ec32..892be54dacc6 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1298,7 +1298,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
 		goto out_free_result;
 	}
 
-	res = scsi_add_lun(sdev, result, &bflags, shost->async_scan);
+	res = scsi_add_lun(sdev, result, &bflags, READ_ONCE(shost->async_scan));
 	if (res == SCSI_SCAN_LUN_PRESENT) {
 		if (bflags & BLIST_KEY) {
 			sdev->lockable = 0;
@@ -1629,7 +1629,7 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
 	scsi_autopm_get_target(starget);
 
 	mutex_lock(&shost->scan_mutex);
-	if (!shost->async_scan)
+	if (!READ_ONCE(shost->async_scan))
 		scsi_complete_async_scans();
 
 	if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
@@ -1839,7 +1839,7 @@ void scsi_scan_target(struct device *parent, unsigned int channel,
 		return;
 
 	mutex_lock(&shost->scan_mutex);
-	if (!shost->async_scan)
+	if (!READ_ONCE(shost->async_scan))
 		scsi_complete_async_scans();
 
 	if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
@@ -1896,7 +1896,7 @@ int scsi_scan_host_selected(struct Scsi_Host *shost, unsigned int channel,
 		return -EINVAL;
 
 	mutex_lock(&shost->scan_mutex);
-	if (!shost->async_scan)
+	if (!READ_ONCE(shost->async_scan))
 		scsi_complete_async_scans();
 
 	if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
@@ -1943,13 +1943,12 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
 static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 {
 	struct async_scan_data *data = NULL;
-	unsigned long flags;
 
 	if (strncmp(scsi_scan_type, "sync", 4) == 0)
 		return NULL;
 
 	mutex_lock(&shost->scan_mutex);
-	if (shost->async_scan) {
+	if (READ_ONCE(shost->async_scan)) {
 		shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__);
 		goto err;
 	}
@@ -1962,9 +1961,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 		goto err;
 	init_completion(&data->prev_finished);
 
-	spin_lock_irqsave(shost->host_lock, flags);
-	shost->async_scan = 1;
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	WRITE_ONCE(shost->async_scan, true);
 	mutex_unlock(&shost->scan_mutex);
 
 	spin_lock(&async_scan_lock);
@@ -1992,7 +1989,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost)
 static void scsi_finish_async_scan(struct async_scan_data *data)
 {
 	struct Scsi_Host *shost;
-	unsigned long flags;
 
 	if (!data)
 		return;
@@ -2001,7 +1997,7 @@ static void scsi_finish_async_scan(struct async_scan_data *data)
 
 	mutex_lock(&shost->scan_mutex);
 
-	if (!shost->async_scan) {
+	if (!READ_ONCE(shost->async_scan)) {
 		shost_printk(KERN_INFO, shost, "%s called twice\n", __func__);
 		dump_stack();
 		mutex_unlock(&shost->scan_mutex);
@@ -2012,9 +2008,7 @@ static void scsi_finish_async_scan(struct async_scan_data *data)
 
 	scsi_sysfs_add_devices(shost);
 
-	spin_lock_irqsave(shost->host_lock, flags);
-	shost->async_scan = 0;
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	WRITE_ONCE(shost->async_scan, false);
 
 	mutex_unlock(&shost->scan_mutex);
 
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index f6e12565a81d..ad745462f5ec 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -660,6 +660,10 @@ struct Scsi_Host {
 	 */
 	unsigned nr_hw_queues;
 	unsigned nr_maps;
+
+	/* Asynchronous scan in progress */
+	bool async_scan;
+
 	unsigned active_mode:2;
 
 	/*
@@ -678,9 +682,6 @@ struct Scsi_Host {
 	/* Task mgmt function in progress */
 	unsigned tmf_in_progress:1;
 
-	/* Asynchronous scan in progress */
-	unsigned async_scan:1;
-
 	/* Don't resume host in EH */
 	unsigned eh_noresume:1;
 
-- 
2.43.7
Re: [PATCH v4] scsi: core: Fix async_scan race condition with READ_ONCE/WRITE_ONCE
Posted by Bart Van Assche 1 month, 1 week ago
On 3/4/26 3:29 AM, Chaohai Chen wrote:
> -	if (!shost->async_scan) {
> +	if (!READ_ONCE(shost->async_scan)) {

Yikes. I'm not aware of any other kernel code that uses READ_ONCE() /
WRITE_ONCE() to access a member variable protected by a mutex. Please
annotate the async_scan member variable with __guarded_by() and drop the
READ_ONCE() and WRITE_ONCE() invocations introduced by this patch.

Bart.