Introduce a scsi_device head structure - scsi_mpath_head - to manage
multipathing for a scsi_device. This is similar to nvme_ns_head structure.
There is no reference in scsi_mpath_head to any disk, as this would be
mananged by the scsi_disk driver.
A list of scsi_mpath_head structures is managed to lookup for matching
multipathed scsi_device's. Matching is done through the scsi_device
unique id.
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/scsi_multipath.c | 147 ++++++++++++++++++++++++++++++++++
drivers/scsi/scsi_sysfs.c | 3 +
include/scsi/scsi_multipath.h | 29 +++++++
3 files changed, 179 insertions(+)
diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index 04e0bad3d9204..49316269fad8e 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -16,6 +16,10 @@
bool scsi_multipath;
static bool scsi_multipath_always;
+static LIST_HEAD(scsi_mpath_heads_list);
+static DEFINE_MUTEX(scsi_mpath_heads_lock);
+static DEFINE_IDA(scsi_multipath_dev_ida);
+
static int multipath_param_set(const char *val, const struct kernel_param *kp)
{
int ret;
@@ -99,6 +103,73 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev)
return 0;
}
+struct mpath_head_template smpdt_pr = {
+};
+
+static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
+{
+ struct scsi_mpath_head *scsi_mpath_head;
+ int ret;
+
+ scsi_mpath_head = kzalloc(sizeof(*scsi_mpath_head), GFP_KERNEL);
+ if (!scsi_mpath_head)
+ return NULL;
+
+ ida_init(&scsi_mpath_head->ida);
+ mutex_init(&scsi_mpath_head->lock);
+
+ scsi_mpath_head->mpath_head = mpath_alloc_head();
+ if (IS_ERR(scsi_mpath_head->mpath_head))
+ goto out_free;
+ scsi_mpath_head->mpath_head->mpdt = &smpdt_pr;
+ scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head;
+
+ scsi_mpath_head->index = ida_alloc(&scsi_multipath_dev_ida, GFP_KERNEL);
+ if (scsi_mpath_head->index < 0)
+ goto out_put_head;
+
+ device_initialize(&scsi_mpath_head->dev);
+ ret = dev_set_name(&scsi_mpath_head->dev, "%d", scsi_mpath_head->index);
+ if (ret) {
+ put_device(&scsi_mpath_head->dev);
+ goto out_free_ida;
+ }
+
+ return scsi_mpath_head;
+
+out_free_ida:
+ ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index);
+out_put_head:
+ mpath_put_head(scsi_mpath_head->mpath_head);
+out_free:
+ kfree(scsi_mpath_head);
+ return NULL;
+}
+
+static struct scsi_mpath_head *scsi_mpath_find_head(
+ struct scsi_mpath_device *scsi_mpath_dev)
+{
+ struct scsi_mpath_head *scsi_mpath_head;
+ int ret;
+
+ mutex_lock(&scsi_mpath_heads_lock);
+ list_for_each_entry(scsi_mpath_head, &scsi_mpath_heads_list, entry) {
+ ret = scsi_mpath_get_head(scsi_mpath_head);
+ if (ret)
+ continue;
+ if (strncmp(scsi_mpath_head->wwid,
+ scsi_mpath_dev->device_id_str,
+ SCSI_MPATH_DEVICE_ID_LEN) == 0) {
+
+ mutex_unlock(&scsi_mpath_heads_lock);
+ return scsi_mpath_head;
+ }
+ scsi_mpath_put_head(scsi_mpath_head);
+ }
+
+ return NULL;
+}
+
static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
{
kfree(sdev->scsi_mpath_dev);
@@ -107,6 +178,7 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
int scsi_mpath_dev_alloc(struct scsi_device *sdev)
{
+ struct scsi_mpath_head *scsi_mpath_head;
int ret;
if (!scsi_multipath)
@@ -127,13 +199,75 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
goto out_uninit;
}
+ scsi_mpath_head = scsi_mpath_find_head(sdev->scsi_mpath_dev);
+ if (scsi_mpath_head)
+ goto found;
+ /* scsi_mpath_disks_list lock held */
+ scsi_mpath_head = scsi_mpath_alloc_head();
+ if (!scsi_mpath_head)
+ goto out_uninit;
+
+ strcpy(scsi_mpath_head->wwid, sdev->scsi_mpath_dev->device_id_str);
+
+ ret = device_add(&scsi_mpath_head->dev);
+ if (ret)
+ goto out_put_head;
+
+ list_add_tail(&scsi_mpath_head->entry, &scsi_mpath_heads_list);
+
+ mutex_unlock(&scsi_mpath_heads_lock);
+ sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
+
+found:
+ sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
+ if (sdev->scsi_mpath_dev->index < 0) {
+ ret = sdev->scsi_mpath_dev->index;
+ goto out_put_head;
+ }
+
+ mutex_lock(&scsi_mpath_head->lock);
+ scsi_mpath_head->dev_count++;
+ mutex_unlock(&scsi_mpath_head->lock);
+
+ sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
return 0;
+out_put_head:
+ scsi_mpath_put_head(scsi_mpath_head);
out_uninit:
+ mutex_unlock(&scsi_mpath_heads_lock);
scsi_multipath_sdev_uninit(sdev);
return ret;
}
+static void scsi_mpath_remove_head(struct scsi_mpath_device *scsi_mpath_dev)
+{
+ struct scsi_mpath_head *scsi_mpath_head =
+ scsi_mpath_dev->scsi_mpath_head;
+ bool last_path = false;
+
+ mutex_lock(&scsi_mpath_head->lock);
+ scsi_mpath_head->dev_count--;
+ if (scsi_mpath_head->dev_count == 0)
+ last_path = true;
+ mutex_unlock(&scsi_mpath_head->lock);
+
+ if (last_path)
+ device_del(&scsi_mpath_head->dev);
+
+ scsi_mpath_dev->scsi_mpath_head = NULL;
+ scsi_mpath_put_head(scsi_mpath_head);
+}
+
+void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev)
+{
+ struct scsi_mpath_head *scsi_mpath_head = scsi_mpath_dev->scsi_mpath_head;
+
+ ida_free(&scsi_mpath_head->ida, scsi_mpath_dev->index);
+
+ scsi_mpath_remove_head(scsi_mpath_dev);
+}
+
void scsi_mpath_dev_release(struct scsi_device *sdev)
{
struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
@@ -142,8 +276,21 @@ void scsi_mpath_dev_release(struct scsi_device *sdev)
return;
scsi_multipath_sdev_uninit(sdev);
+}
+
+int scsi_mpath_get_head(struct scsi_mpath_head *scsi_mpath_head)
+{
+ if (!get_device(&scsi_mpath_head->dev))
+ return -ENXIO;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scsi_mpath_get_head);
+void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head)
+{
+ put_device(&scsi_mpath_head->dev);
}
+EXPORT_SYMBOL_GPL(scsi_mpath_put_head);
int __init scsi_multipath_init(void)
{
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 0d69e27600a7a..287a683e89ae5 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1447,6 +1447,9 @@ void __scsi_remove_device(struct scsi_device *sdev)
} else
put_device(&sdev->sdev_dev);
+ if (sdev->scsi_mpath_dev)
+ scsi_mpath_remove_device(sdev->scsi_mpath_dev);
+
/*
* Stop accepting new requests and wait until all queuecommand() and
* scsi_run_queue() invocations have finished before tearing down the
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index ca00ea10cd5db..38953b05a44dc 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -19,9 +19,22 @@
#ifdef CONFIG_SCSI_MULTIPATH
#define SCSI_MPATH_DEVICE_ID_LEN 40
+struct scsi_mpath_head {
+ char wwid[SCSI_MPATH_DEVICE_ID_LEN];
+ struct list_head entry;
+ int dev_count;
+ struct ida ida;
+ struct mutex lock;
+ struct mpath_head *mpath_head;
+ struct device dev;
+ int index;
+};
+
struct scsi_mpath_device {
struct mpath_device mpath_device;
struct scsi_device *sdev;
+ int index;
+ struct scsi_mpath_head *scsi_mpath_head;
char device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
};
@@ -32,8 +45,13 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev);
void scsi_mpath_dev_release(struct scsi_device *sdev);
int scsi_multipath_init(void);
void scsi_multipath_exit(void);
+void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev);
+int scsi_mpath_get_head(struct scsi_mpath_head *);
+void scsi_mpath_put_head(struct scsi_mpath_head *);
#else /* CONFIG_SCSI_MULTIPATH */
+struct scsi_mpath_head {
+};
struct scsi_mpath_device {
};
@@ -51,5 +69,16 @@ static inline int scsi_multipath_init(void)
static inline void scsi_multipath_exit(void)
{
}
+static inline void scsi_mpath_remove_device(struct scsi_mpath_device
+ *scsi_mpath_dev)
+{
+}
+static inline int scsi_mpath_get_head(struct scsi_mpath_head *)
+{
+ return 0;
+}
+static inline void scsi_mpath_put_head(struct scsi_mpath_head *)
+{
+}
#endif /* CONFIG_SCSI_MULTIPATH */
#endif /* _SCSI_SCSI_MULTIPATH_H */
--
2.43.5
On 2/25/26 16:36, John Garry wrote:
> Introduce a scsi_device head structure - scsi_mpath_head - to manage
> multipathing for a scsi_device. This is similar to nvme_ns_head structure.
>
> There is no reference in scsi_mpath_head to any disk, as this would be
> mananged by the scsi_disk driver.
>
> A list of scsi_mpath_head structures is managed to lookup for matching
> multipathed scsi_device's. Matching is done through the scsi_device
> unique id.
>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
> drivers/scsi/scsi_multipath.c | 147 ++++++++++++++++++++++++++++++++++
> drivers/scsi/scsi_sysfs.c | 3 +
> include/scsi/scsi_multipath.h | 29 +++++++
> 3 files changed, 179 insertions(+)
>
> diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
> index 04e0bad3d9204..49316269fad8e 100644
> --- a/drivers/scsi/scsi_multipath.c
> +++ b/drivers/scsi/scsi_multipath.c
> @@ -16,6 +16,10 @@
> bool scsi_multipath;
> static bool scsi_multipath_always;
>
> +static LIST_HEAD(scsi_mpath_heads_list);
> +static DEFINE_MUTEX(scsi_mpath_heads_lock);
> +static DEFINE_IDA(scsi_multipath_dev_ida);
> +
> static int multipath_param_set(const char *val, const struct kernel_param *kp)
> {
> int ret;
> @@ -99,6 +103,73 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev)
> return 0;
> }
>
> +struct mpath_head_template smpdt_pr = {
> +};
> +
> +static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
> +{
> + struct scsi_mpath_head *scsi_mpath_head;
> + int ret;
> +
> + scsi_mpath_head = kzalloc(sizeof(*scsi_mpath_head), GFP_KERNEL);
> + if (!scsi_mpath_head)
> + return NULL;
> +
> + ida_init(&scsi_mpath_head->ida);
> + mutex_init(&scsi_mpath_head->lock);
> +
> + scsi_mpath_head->mpath_head = mpath_alloc_head();
> + if (IS_ERR(scsi_mpath_head->mpath_head))
> + goto out_free;
> + scsi_mpath_head->mpath_head->mpdt = &smpdt_pr;
mpdt?
What's that supposed to mean?
Seems to be like a persistent reservation thing, so why don't
you introduce it together with PR suppoer?
> + scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head;
> +
> + scsi_mpath_head->index = ida_alloc(&scsi_multipath_dev_ida, GFP_KERNEL);
> + if (scsi_mpath_head->index < 0)
> + goto out_put_head;
> +
> + device_initialize(&scsi_mpath_head->dev);
> + ret = dev_set_name(&scsi_mpath_head->dev, "%d", scsi_mpath_head->index);
Huh? The name is just the number? So we will have a device
/sys/devices/virtual/1 ?
The sysfs registration looks decidedly odd.
I guess we should add a scsi multipath class to sort the devices under.
> + if (ret) {
> + put_device(&scsi_mpath_head->dev);
> + goto out_free_ida;
> + }
> +
> + return scsi_mpath_head;
> +
> +out_free_ida:
> + ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index);
> +out_put_head:
> + mpath_put_head(scsi_mpath_head->mpath_head);
> +out_free:
> + kfree(scsi_mpath_head);
> + return NULL;
> +}
> +
> +static struct scsi_mpath_head *scsi_mpath_find_head(
> + struct scsi_mpath_device *scsi_mpath_dev)
> +{
> + struct scsi_mpath_head *scsi_mpath_head;
> + int ret;
> +
> + mutex_lock(&scsi_mpath_heads_lock);
> + list_for_each_entry(scsi_mpath_head, &scsi_mpath_heads_list, entry) {
> + ret = scsi_mpath_get_head(scsi_mpath_head);
> + if (ret)
> + continue;
> + if (strncmp(scsi_mpath_head->wwid,
> + scsi_mpath_dev->device_id_str,
> + SCSI_MPATH_DEVICE_ID_LEN) == 0) {
> +
> + mutex_unlock(&scsi_mpath_heads_lock);
> + return scsi_mpath_head;
> + }
> + scsi_mpath_put_head(scsi_mpath_head);
> + }
> +
> + return NULL;
> +}
> +
> static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
> {
> kfree(sdev->scsi_mpath_dev);
> @@ -107,6 +178,7 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
>
> int scsi_mpath_dev_alloc(struct scsi_device *sdev)
> {
> + struct scsi_mpath_head *scsi_mpath_head;
> int ret;
>
> if (!scsi_multipath)
> @@ -127,13 +199,75 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
> goto out_uninit;
> }
>
> + scsi_mpath_head = scsi_mpath_find_head(sdev->scsi_mpath_dev);
> + if (scsi_mpath_head)
> + goto found;
> + /* scsi_mpath_disks_list lock held */
> + scsi_mpath_head = scsi_mpath_alloc_head();
> + if (!scsi_mpath_head)
> + goto out_uninit;
> +
> + strcpy(scsi_mpath_head->wwid, sdev->scsi_mpath_dev->device_id_str);
> +
Do we have a sysfs attribute for this?
> + ret = device_add(&scsi_mpath_head->dev);
> + if (ret)
> + goto out_put_head;
> +
> + list_add_tail(&scsi_mpath_head->entry, &scsi_mpath_heads_list);
> +
> + mutex_unlock(&scsi_mpath_heads_lock);
> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
> +
> +found:
> + sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
> + if (sdev->scsi_mpath_dev->index < 0) {
> + ret = sdev->scsi_mpath_dev->index;
> + goto out_put_head;
> + }
> +
> + mutex_lock(&scsi_mpath_head->lock);
> + scsi_mpath_head->dev_count++;
> + mutex_unlock(&scsi_mpath_head->lock);
> +
> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
> return 0;
>
> +out_put_head:
> + scsi_mpath_put_head(scsi_mpath_head);
> out_uninit:
> + mutex_unlock(&scsi_mpath_heads_lock);
> scsi_multipath_sdev_uninit(sdev);
> return ret;
> }
>
> +static void scsi_mpath_remove_head(struct scsi_mpath_device *scsi_mpath_dev)
> +{
> + struct scsi_mpath_head *scsi_mpath_head =
> + scsi_mpath_dev->scsi_mpath_head;
> + bool last_path = false;
> +
> + mutex_lock(&scsi_mpath_head->lock);
> + scsi_mpath_head->dev_count--;
> + if (scsi_mpath_head->dev_count == 0)
> + last_path = true;
> + mutex_unlock(&scsi_mpath_head->lock);
> +
> + if (last_path)
> + device_del(&scsi_mpath_head->dev);
> +
> + scsi_mpath_dev->scsi_mpath_head = NULL;
> + scsi_mpath_put_head(scsi_mpath_head);
> +}
> +
> +void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev)
> +{
> + struct scsi_mpath_head *scsi_mpath_head = scsi_mpath_dev->scsi_mpath_head;
> +
> + ida_free(&scsi_mpath_head->ida, scsi_mpath_dev->index);
> +
> + scsi_mpath_remove_head(scsi_mpath_dev);
> +}
> +
> void scsi_mpath_dev_release(struct scsi_device *sdev)
> {
> struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
> @@ -142,8 +276,21 @@ void scsi_mpath_dev_release(struct scsi_device *sdev)
> return;
>
> scsi_multipath_sdev_uninit(sdev);
> +}
> +
> +int scsi_mpath_get_head(struct scsi_mpath_head *scsi_mpath_head)
> +{
> + if (!get_device(&scsi_mpath_head->dev))
> + return -ENXIO;
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(scsi_mpath_get_head);
>
> +void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head)
> +{
> + put_device(&scsi_mpath_head->dev);
> }
> +EXPORT_SYMBOL_GPL(scsi_mpath_put_head);
>
> int __init scsi_multipath_init(void)
> {
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 0d69e27600a7a..287a683e89ae5 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -1447,6 +1447,9 @@ void __scsi_remove_device(struct scsi_device *sdev)
> } else
> put_device(&sdev->sdev_dev);
>
> + if (sdev->scsi_mpath_dev)
> + scsi_mpath_remove_device(sdev->scsi_mpath_dev);
> +
> /*
> * Stop accepting new requests and wait until all queuecommand() and
> * scsi_run_queue() invocations have finished before tearing down the
> diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
> index ca00ea10cd5db..38953b05a44dc 100644
> --- a/include/scsi/scsi_multipath.h
> +++ b/include/scsi/scsi_multipath.h
> @@ -19,9 +19,22 @@
> #ifdef CONFIG_SCSI_MULTIPATH
> #define SCSI_MPATH_DEVICE_ID_LEN 40
>
> +struct scsi_mpath_head {
> + char wwid[SCSI_MPATH_DEVICE_ID_LEN];
Don't name it WWID. That's an ATA thing. Make it vpd_id.
> + struct list_head entry;
> + int dev_count;
> + struct ida ida;
> + struct mutex lock;
> + struct mpath_head *mpath_head;
> + struct device dev;
> + int index;
> +};
> +
> struct scsi_mpath_device {
> struct mpath_device mpath_device;
> struct scsi_device *sdev;
> + int index;
> + struct scsi_mpath_head *scsi_mpath_head;
>
> char device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
> };
> @@ -32,8 +45,13 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev);
> void scsi_mpath_dev_release(struct scsi_device *sdev);
> int scsi_multipath_init(void);
> void scsi_multipath_exit(void);
> +void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev);
> +int scsi_mpath_get_head(struct scsi_mpath_head *);
> +void scsi_mpath_put_head(struct scsi_mpath_head *);
> #else /* CONFIG_SCSI_MULTIPATH */
>
> +struct scsi_mpath_head {
> +};
> struct scsi_mpath_device {
> };
>
> @@ -51,5 +69,16 @@ static inline int scsi_multipath_init(void)
> static inline void scsi_multipath_exit(void)
> {
> }
> +static inline void scsi_mpath_remove_device(struct scsi_mpath_device
> + *scsi_mpath_dev)
> +{
> +}
> +static inline int scsi_mpath_get_head(struct scsi_mpath_head *)
> +{
> + return 0;
> +}
> +static inline void scsi_mpath_put_head(struct scsi_mpath_head *)
> +{
> +}
> #endif /* CONFIG_SCSI_MULTIPATH */
> #endif /* _SCSI_SCSI_MULTIPATH_H */
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.com +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
>> + if (!scsi_mpath_head)
>> + return NULL;
>> +
>> + ida_init(&scsi_mpath_head->ida);
>> + mutex_init(&scsi_mpath_head->lock);
>> +
>> + scsi_mpath_head->mpath_head = mpath_alloc_head();
>> + if (IS_ERR(scsi_mpath_head->mpath_head))
>> + goto out_free;
>> + scsi_mpath_head->mpath_head->mpdt = &smpdt_pr;
>
> mpdt?
> What's that supposed to mean?
multipath device template, like scsi host template
Please check it in the libmultipath series
> Seems to be like a persistent reservation thing, so why don't
> you introduce it together with PR suppoer?
>
>> + scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head;
>> +
>> + scsi_mpath_head->index = ida_alloc(&scsi_multipath_dev_ida,
>> GFP_KERNEL);
>> + if (scsi_mpath_head->index < 0)
>> + goto out_put_head;
>> +
>> + device_initialize(&scsi_mpath_head->dev);
>> + ret = dev_set_name(&scsi_mpath_head->dev, "%d", scsi_mpath_head-
>> >index);
>
> Huh? The name is just the number? So we will have a device
> /sys/devices/virtual/1 ?
>
> The sysfs registration looks decidedly odd.
> I guess we should add a scsi multipath class to sort the devices under.
We do, check the later patches.
I have to admit that I am not a huge fan of the naming, but it's hard to
find something very good.
There is no common host. Further more, if I wanted to use HCIL SCSI
naming, then the CIL may not be consistent or unique. I did consider
using the wwid/vpd id, but that seems so inconsistent.
>
>> + if (ret) {
>> + put_device(&scsi_mpath_head->dev);
>> + goto out_free_ida;
>> + }
>> +
>> + return scsi_mpath_head;
>> +
>> +out_free_ida:
>> + ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index);
>> +out_put_head:
>> + mpath_put_head(scsi_mpath_head->mpath_head);
>> +out_free:
>> + kfree(scsi_mpath_head);
>> + return NULL;
>> +}
>> +
>> +static struct scsi_mpath_head *scsi_mpath_find_head(
>> + struct scsi_mpath_device *scsi_mpath_dev)
>> +{
>> + struct scsi_mpath_head *scsi_mpath_head;
>> + int ret;
>> +
>> + mutex_lock(&scsi_mpath_heads_lock);
>> + list_for_each_entry(scsi_mpath_head, &scsi_mpath_heads_list,
>> entry) {
>> + ret = scsi_mpath_get_head(scsi_mpath_head);
>> + if (ret)
>> + continue;
>> + if (strncmp(scsi_mpath_head->wwid,
>> + scsi_mpath_dev->device_id_str,
>> + SCSI_MPATH_DEVICE_ID_LEN) == 0) {
>> +
>> + mutex_unlock(&scsi_mpath_heads_lock);
>> + return scsi_mpath_head;
>> + }
>> + scsi_mpath_put_head(scsi_mpath_head);
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
>> {
>> kfree(sdev->scsi_mpath_dev);
>> @@ -107,6 +178,7 @@ static void scsi_multipath_sdev_uninit(struct
>> scsi_device *sdev)
>> int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>> {
>> + struct scsi_mpath_head *scsi_mpath_head;
>> int ret;
>> if (!scsi_multipath)
>> @@ -127,13 +199,75 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>> goto out_uninit;
>> }
>> + scsi_mpath_head = scsi_mpath_find_head(sdev->scsi_mpath_dev);
>> + if (scsi_mpath_head)
>> + goto found;
>> + /* scsi_mpath_disks_list lock held */
>> + scsi_mpath_head = scsi_mpath_alloc_head();
>> + if (!scsi_mpath_head)
>> + goto out_uninit;
>> +
>> + strcpy(scsi_mpath_head->wwid, sdev->scsi_mpath_dev->device_id_str);
>> +
>
> Do we have a sysfs attribute for this?
yes, it's introduced later
>
>> + ret = device_add(&scsi_mpath_head->dev);
>> + if (ret)
>> + goto out_put_head;
>> +
>> + list_add_tail(&scsi_mpath_head->entry, &scsi_mpath_heads_list);
>> +
>> + mutex_unlock(&scsi_mpath_heads_lock);
>> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
>> +
>> +found:
>> + sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida,
>> GFP_KERNEL);
>> + if (sdev->scsi_mpath_dev->index < 0) {
>> + ret = sdev->scsi_mpath_dev->index;
>> + goto out_put_head;
>> + }
>> +
>> + mutex_lock(&scsi_mpath_head->lock);
>> + scsi_mpath_head->dev_count++;
>> + mutex_unlock(&scsi_mpath_head->lock);
>> +
>> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
>> return 0;
>> +out_put_head:
>> + scsi_mpath_put_head(scsi_mpath_head);
>> out_uninit:
>> + mutex_unlock(&scsi_mpath_heads_lock);
>> scsi_multipath_sdev_uninit(sdev);
>> return ret;
>> }
>> +static void scsi_mpath_remove_head(struct scsi_mpath_device
>> *scsi_mpath_dev)
>> +{
>> + struct scsi_mpath_head *scsi_mpath_head =
>> + scsi_mpath_dev->scsi_mpath_head;
>> + bool last_path = false;
>> +
>> + mutex_lock(&scsi_mpath_head->lock);
>> + scsi_mpath_head->dev_count--;
>> + if (scsi_mpath_head->dev_count == 0)
>> + last_path = true;
>> + mutex_unlock(&scsi_mpath_head->lock);
>> +
>> + if (last_path)
>> + device_del(&scsi_mpath_head->dev);
>> +
>> + scsi_mpath_dev->scsi_mpath_head = NULL;
>> + scsi_mpath_put_head(scsi_mpath_head);
>> +}
>> +
>> +void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev)
>> +{
>> + struct scsi_mpath_head *scsi_mpath_head = scsi_mpath_dev-
>> >scsi_mpath_head;
>> +
>> + ida_free(&scsi_mpath_head->ida, scsi_mpath_dev->index);
>> +
>> + scsi_mpath_remove_head(scsi_mpath_dev);
>> +}
>> +
>> void scsi_mpath_dev_release(struct scsi_device *sdev)
>> {
>> struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
>> @@ -142,8 +276,21 @@ void scsi_mpath_dev_release(struct scsi_device
>> *sdev)
>> return;
>> scsi_multipath_sdev_uninit(sdev);
>> +}
>> +
>> +int scsi_mpath_get_head(struct scsi_mpath_head *scsi_mpath_head)
>> +{
>> + if (!get_device(&scsi_mpath_head->dev))
>> + return -ENXIO;
>> + return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(scsi_mpath_get_head);
>> +void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head)
>> +{
>> + put_device(&scsi_mpath_head->dev);
>> }
>> +EXPORT_SYMBOL_GPL(scsi_mpath_put_head);
>> int __init scsi_multipath_init(void)
>> {
>> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
>> index 0d69e27600a7a..287a683e89ae5 100644
>> --- a/drivers/scsi/scsi_sysfs.c
>> +++ b/drivers/scsi/scsi_sysfs.c
>> @@ -1447,6 +1447,9 @@ void __scsi_remove_device(struct scsi_device *sdev)
>> } else
>> put_device(&sdev->sdev_dev);
>> + if (sdev->scsi_mpath_dev)
>> + scsi_mpath_remove_device(sdev->scsi_mpath_dev);
>> +
>> /*
>> * Stop accepting new requests and wait until all queuecommand()
>> and
>> * scsi_run_queue() invocations have finished before tearing
>> down the
>> diff --git a/include/scsi/scsi_multipath.h b/include/scsi/
>> scsi_multipath.h
>> index ca00ea10cd5db..38953b05a44dc 100644
>> --- a/include/scsi/scsi_multipath.h
>> +++ b/include/scsi/scsi_multipath.h
>> @@ -19,9 +19,22 @@
>> #ifdef CONFIG_SCSI_MULTIPATH
>> #define SCSI_MPATH_DEVICE_ID_LEN 40
>> +struct scsi_mpath_head {
>> + char wwid[SCSI_MPATH_DEVICE_ID_LEN];
>
> Don't name it WWID. That's an ATA thing. Make it vpd_id.
>
ok
Thanks!
On Wed, Feb 25, 2026 at 03:36:06PM +0000, John Garry wrote:
> Introduce a scsi_device head structure - scsi_mpath_head - to manage
> multipathing for a scsi_device. This is similar to nvme_ns_head structure.
>
> There is no reference in scsi_mpath_head to any disk, as this would be
> mananged by the scsi_disk driver.
>
> A list of scsi_mpath_head structures is managed to lookup for matching
> multipathed scsi_device's. Matching is done through the scsi_device
> unique id.
>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
> drivers/scsi/scsi_multipath.c | 147 ++++++++++++++++++++++++++++++++++
> drivers/scsi/scsi_sysfs.c | 3 +
> include/scsi/scsi_multipath.h | 29 +++++++
> 3 files changed, 179 insertions(+)
>
> diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
> index 04e0bad3d9204..49316269fad8e 100644
> --- a/drivers/scsi/scsi_multipath.c
> +++ b/drivers/scsi/scsi_multipath.c
>
> @@ -107,6 +178,7 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
>
> int scsi_mpath_dev_alloc(struct scsi_device *sdev)
> {
> + struct scsi_mpath_head *scsi_mpath_head;
> int ret;
>
> if (!scsi_multipath)
> @@ -127,13 +199,75 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
> goto out_uninit;
> }
>
> + scsi_mpath_head = scsi_mpath_find_head(sdev->scsi_mpath_dev);
> + if (scsi_mpath_head)
> + goto found;
> + /* scsi_mpath_disks_list lock held */
Typo. It should be "scsi_mpath_heads_list lock still held". Also, why
split the locking between this function and scsi_mpath_find_head()? It
seems like it would be clearer if you did in all here.
> + scsi_mpath_head = scsi_mpath_alloc_head();
> + if (!scsi_mpath_head)
> + goto out_uninit;
It seems resonable to failback to treating the device as non-multipathed
if you can't setup the multipathing resources. But you should probably
warn if that happens.
> +
> + strcpy(scsi_mpath_head->wwid, sdev->scsi_mpath_dev->device_id_str);
> +
> + ret = device_add(&scsi_mpath_head->dev);
> + if (ret)
> + goto out_put_head;
> +
> + list_add_tail(&scsi_mpath_head->entry, &scsi_mpath_heads_list);
> +
> + mutex_unlock(&scsi_mpath_heads_lock);
> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
You already set sdev->scsi_mpath_dev->scsi_mpath_head right before you
return.
> +
> +found:
> + sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
> + if (sdev->scsi_mpath_dev->index < 0) {
> + ret = sdev->scsi_mpath_dev->index;
> + goto out_put_head;
&scsi_mpath_heads_lock is already unlocked here, but it will get
unlocked again in out_uninit
> + }
> +
> + mutex_lock(&scsi_mpath_head->lock);
> + scsi_mpath_head->dev_count++;
> + mutex_unlock(&scsi_mpath_head->lock);
> +
> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
> return 0;
>
> +out_put_head:
> + scsi_mpath_put_head(scsi_mpath_head);
> out_uninit:
> + mutex_unlock(&scsi_mpath_heads_lock);
> scsi_multipath_sdev_uninit(sdev);
> return ret;
> }
>
> +static void scsi_mpath_remove_head(struct scsi_mpath_device *scsi_mpath_dev)
> +{
> + struct scsi_mpath_head *scsi_mpath_head =
> + scsi_mpath_dev->scsi_mpath_head;
> + bool last_path = false;
> +
> + mutex_lock(&scsi_mpath_head->lock);
> + scsi_mpath_head->dev_count--;
> + if (scsi_mpath_head->dev_count == 0)
> + last_path = true;
> + mutex_unlock(&scsi_mpath_head->lock);
The locking of scsi_mpath_head->lock makes it appear that
scsi_mpath_remove_head() and scsi_mpath_dev_alloc() can both happen at
the same time. I didn't check enough to verify if that's actually the
case, but if it's not, then the lock is unnecessary. If they can run at
the same time, then I don't see anything keeping scsi_mpath_dev_alloc()
from calling scsi_mpath_find_head() and finding a scsi_mpath_head that
is just about to have its device deleted by
device_del(&scsi_mpath_head->dev). If this happens, the device won't
get re-added.
-Ben
> +
> + if (last_path)
> + device_del(&scsi_mpath_head->dev);
> +
> + scsi_mpath_dev->scsi_mpath_head = NULL;
> + scsi_mpath_put_head(scsi_mpath_head);
> +}
On 02/03/2026 02:50, Benjamin Marzinski wrote:
> On Wed, Feb 25, 2026 at 03:36:06PM +0000, John Garry wrote:
>> Introduce a scsi_device head structure - scsi_mpath_head - to manage
>> multipathing for a scsi_device. This is similar to nvme_ns_head structure.
>>
>> There is no reference in scsi_mpath_head to any disk, as this would be
>> mananged by the scsi_disk driver.
>>
>> A list of scsi_mpath_head structures is managed to lookup for matching
>> multipathed scsi_device's. Matching is done through the scsi_device
>> unique id.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>> drivers/scsi/scsi_multipath.c | 147 ++++++++++++++++++++++++++++++++++
>> drivers/scsi/scsi_sysfs.c | 3 +
>> include/scsi/scsi_multipath.h | 29 +++++++
>> 3 files changed, 179 insertions(+)
>>
>> diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
>> index 04e0bad3d9204..49316269fad8e 100644
>> --- a/drivers/scsi/scsi_multipath.c
>> +++ b/drivers/scsi/scsi_multipath.c
>>
>> @@ -107,6 +178,7 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
>>
>> int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>> {
>> + struct scsi_mpath_head *scsi_mpath_head;
>> int ret;
>>
>> if (!scsi_multipath)
>> @@ -127,13 +199,75 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>> goto out_uninit;
>> }
>>
>> + scsi_mpath_head = scsi_mpath_find_head(sdev->scsi_mpath_dev);
>> + if (scsi_mpath_head)
>> + goto found;
>> + /* scsi_mpath_disks_list lock held */
>
> Typo. It should be "scsi_mpath_heads_list lock still held".
Yes
> Also, why
> split the locking between this function and scsi_mpath_find_head()? It
> seems like it would be clearer if you did in all here.
Maybe that is better - I'll consider it further.
>
>> + scsi_mpath_head = scsi_mpath_alloc_head();
>> + if (!scsi_mpath_head)
>> + goto out_uninit;
>
> It seems resonable to failback to treating the device as non-multipathed
> if you can't setup the multipathing resources. But you should probably
> warn if that happens.
OK, I can use pr_err or pr_warn if that happens
>
>> +
>> + strcpy(scsi_mpath_head->wwid, sdev->scsi_mpath_dev->device_id_str);
>> +
>> + ret = device_add(&scsi_mpath_head->dev);
>> + if (ret)
>> + goto out_put_head;
>> +
>> + list_add_tail(&scsi_mpath_head->entry, &scsi_mpath_heads_list);
>> +
>> + mutex_unlock(&scsi_mpath_heads_lock);
>> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
>
> You already set sdev->scsi_mpath_dev->scsi_mpath_head right before you
> return.
ok, I'll drop this duplicated code
>
>> +
>> +found:
>> + sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
>> + if (sdev->scsi_mpath_dev->index < 0) {
>> + ret = sdev->scsi_mpath_dev->index;
>> + goto out_put_head;
>
> &scsi_mpath_heads_lock is already unlocked here, but it will get
> unlocked again in out_uninit
Yes, I will fix the locking/unlocking
>
>> + }
>> +
>> + mutex_lock(&scsi_mpath_head->lock);
>> + scsi_mpath_head->dev_count++;
>> + mutex_unlock(&scsi_mpath_head->lock);
>> +
>> + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
>> return 0;
>>
>> +out_put_head:
>> + scsi_mpath_put_head(scsi_mpath_head);
>> out_uninit:
>> + mutex_unlock(&scsi_mpath_heads_lock);
>> scsi_multipath_sdev_uninit(sdev);
>> return ret;
>> }
>>
>> +static void scsi_mpath_remove_head(struct scsi_mpath_device *scsi_mpath_dev)
>> +{
>> + struct scsi_mpath_head *scsi_mpath_head =
>> + scsi_mpath_dev->scsi_mpath_head;
>> + bool last_path = false;
>> +
>> + mutex_lock(&scsi_mpath_head->lock);
>> + scsi_mpath_head->dev_count--;
>> + if (scsi_mpath_head->dev_count == 0)
>> + last_path = true;
>> + mutex_unlock(&scsi_mpath_head->lock);
>
> The locking of scsi_mpath_head->lock makes it appear that
> scsi_mpath_remove_head() and scsi_mpath_dev_alloc() can both happen at
> the same time. I didn't check enough to verify if that's actually the
> case, but if it's not, then the lock is unnecessary.
It should be possible for different sdevs
> If they can run at
> the same time, then I don't see anything keeping scsi_mpath_dev_alloc()
> from calling scsi_mpath_find_head() and finding a scsi_mpath_head that
> is just about to have its device deleted by
> device_del(&scsi_mpath_head->dev). If this happens, the device won't
> get re-added.
Yeah, I think that there might be a problem here. Let me check it further.
Thanks!
© 2016 - 2026 Red Hat, Inc.