Add support to attach a multipath disk.
We still allocate the gendisk per path, and this is required for the
per-path submission. However, those gendisks are marked as hidden. Those
disks are named sdX:Y, where X is the multipath disk index and Y is the
per-path index.
A global list of sd_mpath_disks is kept for matching scsi_device's.
The multipath gendisk has the name and disk->major/minor set to minic a
scsi_disk.
The following is an example of relevant scsi_disk and block sysfs
directories:
$ ls -l /sys/block/ | grep sdc
lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc -> ../devices/virtual/scsi_mpath_disk/0/sdc
lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc:0 -> ../devices/platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0
lrwxrwxrwx 1 root root 0 Feb 24 16:02 sdc:1 -> ../devices/platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1
$ ls -l /sys/class/scsi_mpath_disk/0/
total 0
drwxr-xr-x 2 root root 0 Feb 24 16:03 power
drwxr-xr-x 11 root root 0 Feb 24 16:01 sdc
lrwxrwxrwx 1 root root 0 Feb 24 16:01 subsystem -> ../../../../class/scsi_mpath_disk
-rw-r--r-- 1 root root 4096 Feb 24 16:01 uevent
$ ls -l /sys/class/scsi_mpath_disk/0/sdc/multipath/
total 0
lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:0 -> ../../../../../platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0
lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:1 -> ../../../../../platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1
$ ls -l /dev/sdc*
brw-rw---- 1 root disk 8, 32 Feb 24 16:01 /dev/sdc
brw-rw---- 1 root disk 8, 33 Feb 24 16:01 /dev/sdc1
brw-rw---- 1 root disk 8, 34 Feb 24 16:01 /dev/sdc2
$ lsblk /dev/sdc
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
sdc 8:32 0 600M 0 disk
|-sdc1 8:33 0 9M 0 part
`-sdc2 8:34 0 568M 0 part
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
drivers/scsi/sd.c | 376 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 358 insertions(+), 18 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9617878b53ec6..409c0937764d9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -117,12 +117,33 @@ static DEFINE_IDA(sd_index_ida);
static mempool_t *sd_page_pool;
static struct lock_class_key sd_bio_compl_lkclass;
#ifdef CONFIG_SCSI_MULTIPATH
+static LIST_HEAD(sd_mpath_disks_list);
+static DEFINE_MUTEX(sd_mpath_disks_lock);
+
struct sd_mpath_disk {
+ struct device dev;
+ int disk_index;
+ int disk_count;
+ struct list_head entry;
+ struct mutex lock;
struct mpath_disk *mpath_disk;
+ struct scsi_mpath_head *scsi_mpath_head;
};
static void sd_mpath_disk_release(struct device *dev)
{
+ struct sd_mpath_disk *sd_mpath_disk =
+ container_of(dev, struct sd_mpath_disk, dev);
+ struct scsi_mpath_head *scsi_mpath_head =
+ sd_mpath_disk->scsi_mpath_head;
+ struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;
+
+ mpath_put_disk(mpath_disk);
+
+ ida_free(&sd_index_ida, sd_mpath_disk->disk_index);
+ scsi_mpath_put_head(scsi_mpath_head);
+
+ kfree(sd_mpath_disk);
}
static const struct class sd_mpath_disk_class = {
@@ -4144,7 +4165,302 @@ static const struct scsi_mpath_pr_ops sd_mpath_pr_ops = {
.pr_read_keys = sd_mpath_pr_read_keys,
.pr_read_reservation = sd_mpath_pr_read_reservation,
};
+
+static int sd_mpath_revalidate_head(struct scsi_disk *sdkp)
+{
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;;
+ struct gendisk *disk = mpath_disk->disk;
+ struct queue_limits *sdkp_lim = &sdkp->disk->queue->limits;
+ struct queue_limits lim;
+ unsigned int memflags;
+ int ret;
+
+ lim = queue_limits_start_update(disk->queue);
+ memflags = blk_mq_freeze_queue(disk->queue);
+
+ lim.logical_block_size = sdkp_lim->logical_block_size;
+ lim.physical_block_size = sdkp_lim->physical_block_size;
+ lim.io_min = sdkp_lim->io_min;
+ lim.io_opt = sdkp_lim->io_opt;
+
+ queue_limits_stack_bdev(&lim, sdkp->disk->part0, 0,
+ disk->disk_name);
+
+ /* TODO: setup integrity limits */
+ lim.max_write_streams = sdkp_lim->max_write_streams;
+ lim.write_stream_granularity = sdkp_lim->write_stream_granularity;
+ ret = queue_limits_commit_update(disk->queue, &lim);
+
+ set_capacity_and_notify(disk, get_capacity(sdkp->disk));
+
+ blk_mq_unfreeze_queue(disk->queue, memflags);
+
+ return ret;
+}
+static int sd_mpath_get_disk(struct sd_mpath_disk *sd_mpath_disk)
+{
+ if (!get_device(&sd_mpath_disk->dev))
+ return -ENXIO;
+ return 0;
+}
+
+static void sd_mpath_put_disk(struct sd_mpath_disk *sd_mpath_disk)
+{
+ put_device(&sd_mpath_disk->dev);
+}
+
+static struct sd_mpath_disk *sd_mpath_find_disk(struct scsi_device *sdp)
+{
+ struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
+ struct sd_mpath_disk *sd_mpath_disk;
+ int ret;
+
+ mutex_lock(&sd_mpath_disks_lock);
+ list_for_each_entry(sd_mpath_disk, &sd_mpath_disks_list, entry) {
+ struct scsi_mpath_head *scsi_mpath_head;
+ struct mpath_disk *mpath_disk;
+ struct mpath_head *mpath_head;
+
+ ret = sd_mpath_get_disk(sd_mpath_disk);
+ if (ret)
+ continue;
+ mpath_disk = sd_mpath_disk->mpath_disk;
+ mpath_head = mpath_disk->mpath_head;
+ scsi_mpath_head = mpath_head->drvdata;
+
+ if (strncmp(scsi_mpath_head->wwid,
+ scsi_mpath_dev->device_id_str,
+ SCSI_MPATH_DEVICE_ID_LEN) == 0) {
+
+ mutex_unlock(&sd_mpath_disks_lock);
+ return sd_mpath_disk;
+ }
+ sd_mpath_put_disk(sd_mpath_disk);
+ }
+
+ return NULL;
+}
+
+static void sd_mpath_add_disk(struct scsi_disk *sdkp)
+{
+ struct scsi_device *sdp = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
+ struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;
+ struct mpath_head *mpath_head = mpath_disk->mpath_head;
+
+ mpath_device->disk = sdkp->disk;
+ mpath_add_device(mpath_head, mpath_device);
+ mpath_device_set_live(mpath_disk, mpath_device);
+}
+
+static int sd_mpath_probe(struct scsi_disk *sdkp)
+{
+ struct scsi_device *sdp = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
+ struct device *dma_dev = sdp->host->dma_dev;
+ struct scsi_mpath_head *scsi_mpath_head =
+ scsi_mpath_dev->scsi_mpath_head;
+ struct sd_mpath_disk *sd_mpath_disk;
+ struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
+ struct queue_limits lim;
+ struct gendisk *disk;
+ int error;
+
+ /*
+ * sd_mpath_disks_list is kept locked if no disk found.
+ * Otherwise an extra reference is taken.
+ */
+ sd_mpath_disk = sd_mpath_find_disk(sdp);
+ if (sd_mpath_disk) {
+ mutex_lock(&sd_mpath_disk->lock);
+ sd_mpath_disk->disk_count++;
+ mutex_unlock(&sd_mpath_disk->lock);
+ goto found;
+ }
+
+ sd_mpath_disk = kzalloc(sizeof(*sd_mpath_disk), GFP_KERNEL);
+ if (!sd_mpath_disk) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
+
+ sd_mpath_disk->scsi_mpath_head = scsi_mpath_head;
+ device_initialize(&sd_mpath_disk->dev);
+ mutex_init(&sd_mpath_disk->lock);
+ sd_mpath_disk->dev.class = &sd_mpath_disk_class;
+
+ blk_set_stacking_limits(&lim);
+ lim.dma_alignment = 3;
+ lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
+ BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
+
+ sd_mpath_disk->mpath_disk = mpath_alloc_head_disk(&lim,
+ dev_to_node(dma_dev));
+ if (!sd_mpath_disk->mpath_disk) {
+ error = -ENOMEM;
+ goto out_free_disk;
+ }
+ disk = sd_mpath_disk->mpath_disk->disk;
+ mpath_get_head(mpath_head); /* undone in mpath_free_disk() */
+
+ sd_mpath_disk->mpath_disk->mpath_head = mpath_head;
+ sd_mpath_disk->mpath_disk->parent = &sd_mpath_disk->dev;
+
+ error = ida_alloc(&sd_index_ida, GFP_KERNEL);
+ if (error < 0) {
+ sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
+ goto out_put_disk;
+ }
+ sd_mpath_disk->disk_index = error;
+ error = sd_format_disk_name("sd", sd_mpath_disk->disk_index,
+ disk->disk_name, DISK_NAME_LEN);
+ if (error)
+ goto out_free_index;
+
+ error = dev_set_name(&sd_mpath_disk->dev, "%s",
+ dev_name(&scsi_mpath_head->dev));
+ if (error)
+ goto out_free_index;
+
+ /* undone in sd_mpath_disk_release() */
+ scsi_mpath_get_head(scsi_mpath_head);
+
+ error = device_add(&sd_mpath_disk->dev);
+ if (error) {
+ put_device(&sd_mpath_disk->dev);
+ goto out_unlock;
+ }
+
+ list_add_tail(&sd_mpath_disk->entry, &sd_mpath_disks_list);
+ disk->major = sd_major((sd_mpath_disk->disk_index & 0xf0) >> 4);
+ disk->first_minor = ((sd_mpath_disk->disk_index & 0xf) << 4) |
+ (sd_mpath_disk->disk_index & 0xfff00);
+ disk->minors = SD_MINORS;
+
+ sd_mpath_disk->disk_count = 1;
+ mutex_unlock(&sd_mpath_disks_lock);
+
+found:
+ sdkp->sd_mpath_disk = sd_mpath_disk;
+ sdkp->disk->flags |= GENHD_FL_HIDDEN;
+ snprintf(sdkp->disk->disk_name, DISK_NAME_LEN, "%s:%d",
+ sd_mpath_disk->mpath_disk->disk->disk_name,
+ scsi_mpath_dev->index);
+
+ sdkp->index = -1;
+ return 0;
+
+out_free_index:
+ ida_free(&sd_index_ida, sd_mpath_disk->disk_index);
+out_put_disk:
+ mpath_put_disk(sd_mpath_disk->mpath_disk);
+out_free_disk:
+ kfree(sd_mpath_disk);
+out_unlock:
+ mutex_unlock(&sd_mpath_disks_lock);
+ return error;
+}
+
+static void sd_mpath_remove(struct scsi_disk *sdkp)
+{
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_device *sdp = sdkp->device;
+ struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
+ struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
+ struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;
+ struct mpath_head *mpath_head = mpath_disk->mpath_head;
+ bool remove = false;
+
+ mpath_synchronize(mpath_head);
+
+ if (mpath_clear_current_path(mpath_head, mpath_device))
+ mpath_synchronize(mpath_head);
+
+ mpath_delete_device(mpath_head, mpath_device);
+
+ mutex_lock(&sd_mpath_disk->lock);
+ sd_mpath_disk->disk_count--;
+ /* delayed removal not yet supported */
+ if (!sd_mpath_disk->disk_count) {
+ mutex_lock(&sd_mpath_disks_lock);
+ list_del_init(&sd_mpath_disk->entry);
+ mutex_unlock(&sd_mpath_disks_lock);
+
+ remove = true;
+ }
+ mutex_unlock(&sd_mpath_disk->lock);
+ mpath_remove_sysfs_link(mpath_disk, mpath_device);
+ mpath_device->disk = NULL;
+
+ if (remove) {
+ device_del(&sd_mpath_disk->dev);
+ mpath_remove_disk(mpath_disk);
+ }
+ sd_mpath_put_disk(sd_mpath_disk);
+}
+
+/*
+ * Always calls for a failed probe, so we need to handle that some structures
+ * have not been setup.
+ */
+static void sd_mpath_fail_probe(struct scsi_disk *sdkp)
+{
+ struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
+ struct scsi_mpath_device *scsi_mpath_dev;
+ struct mpath_device *mpath_device;
+ struct scsi_device *sdp = sdkp->device;
+ struct mpath_disk *mpath_disk;
+ bool remove = false;
+
+ if (!sd_mpath_disk)
+ return;
+
+ mpath_disk = sd_mpath_disk->mpath_disk;
+ scsi_mpath_dev = sdp->scsi_mpath_dev;
+ mpath_device = &scsi_mpath_dev->mpath_device;
+
+ mutex_lock(&sd_mpath_disk->lock);
+ sd_mpath_disk->disk_count--;
+ if (!sd_mpath_disk->disk_count) {
+ mutex_lock(&sd_mpath_disks_lock);
+ list_del_init(&sd_mpath_disk->entry);
+ mutex_unlock(&sd_mpath_disks_lock);
+
+ remove = true;
+ }
+ mutex_unlock(&sd_mpath_disk->lock);
+ mpath_device->disk = NULL;
+
+ if (remove) {
+ device_del(&sd_mpath_disk->dev);
+ mpath_remove_disk(mpath_disk);
+ }
+ sd_mpath_put_disk(sd_mpath_disk);
+}
+
#else /* CONFIG_SCSI_MULTIPATH */
+static int sd_mpath_probe(struct scsi_disk *sdkp)
+{
+ return 0;
+}
+static void sd_mpath_remove(struct scsi_disk *sdkp)
+{
+ return;
+}
+static void sd_mpath_fail_probe(struct scsi_disk *sdkp)
+{
+
+}
+static int sd_mpath_revalidate_head(struct scsi_disk *sdkp)
+{
+ return 0;
+}
+static void sd_mpath_add_disk(struct scsi_disk *sdkp)
+{
+}
#endif
/**
* sd_probe - called during driver initialization and whenever a
@@ -4198,22 +4514,33 @@ static int sd_probe(struct device *dev)
&sd_bio_compl_lkclass);
if (!gd)
goto out_free;
+ sdkp->disk = gd;
+ sdkp->device = sdp;
- index = ida_alloc(&sd_index_ida, GFP_KERNEL);
- if (index < 0) {
- sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
- goto out_put;
- }
+ if (sdp->scsi_mpath_dev) {
+ error = sd_mpath_probe(sdkp);
+ if (error)
+ goto out_put;
+ } else {
+ index = ida_alloc(&sd_index_ida, GFP_KERNEL);
+ if (index < 0) {
+ sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
+ goto out_put;
+ }
- error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
- if (error) {
- sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
- goto out_free_index;
+ error = sd_format_disk_name("sd", index, gd->disk_name,
+ DISK_NAME_LEN);
+ if (error) {
+ sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
+ goto out_free_index;
+ }
+ sdkp->index = index;
+
+ gd->major = sd_major((index & 0xf0) >> 4);
+ gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
+ gd->minors = SD_MINORS;
}
- sdkp->device = sdp;
- sdkp->disk = gd;
- sdkp->index = index;
sdkp->max_retries = SD_MAX_RETRIES;
atomic_set(&sdkp->openers, 0);
atomic_set(&sdkp->device->ioerr_cnt, 0);
@@ -4233,16 +4560,13 @@ static int sd_probe(struct device *dev)
error = device_add(&sdkp->disk_dev);
if (error) {
+ sd_mpath_fail_probe(sdkp);
put_device(&sdkp->disk_dev);
goto out;
}
dev_set_drvdata(dev, sdkp);
- gd->major = sd_major((index & 0xf0) >> 4);
- gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
- gd->minors = SD_MINORS;
-
gd->fops = &sd_fops;
gd->private_data = sdkp;
@@ -4260,6 +4584,12 @@ static int sd_probe(struct device *dev)
sd_revalidate_disk(gd);
+ if (sdp->scsi_mpath_dev) {
+ error = sd_mpath_revalidate_head(sdkp);
+ if (error)
+ sdev_printk(KERN_WARNING, sdp, "could not revalidate multipath limits\n");
+ }
+
if (sdp->removable) {
gd->flags |= GENHD_FL_REMOVABLE;
gd->events |= DISK_EVENT_MEDIA_CHANGE;
@@ -4274,11 +4604,15 @@ static int sd_probe(struct device *dev)
error = device_add_disk(dev, gd, NULL);
if (error) {
+ sd_mpath_fail_probe(sdkp);
device_unregister(&sdkp->disk_dev);
put_disk(gd);
goto out;
}
+ if (sdp->scsi_mpath_dev)
+ sd_mpath_add_disk(sdkp);
+
if (sdkp->security) {
sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit);
if (sdkp->opal_dev)
@@ -4292,7 +4626,8 @@ static int sd_probe(struct device *dev)
return 0;
out_free_index:
- ida_free(&sd_index_ida, index);
+ if (index >= 0)
+ ida_free(&sd_index_ida, index);
out_put:
put_disk(gd);
out_free:
@@ -4316,6 +4651,10 @@ static int sd_probe(struct device *dev)
static int sd_remove(struct device *dev)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
+ struct scsi_device *sdp = sdkp->device; // new code
+
+ if (sdp->scsi_mpath_dev)
+ sd_mpath_remove(sdkp);
scsi_autopm_get_device(sdkp->device);
@@ -4332,7 +4671,8 @@ static void scsi_disk_release(struct device *dev)
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
- ida_free(&sd_index_ida, sdkp->index);
+ if (sdkp->index >= 0)
+ ida_free(&sd_index_ida, sdkp->index);
put_device(&sdkp->device->sdev_gendev);
free_opal_dev(sdkp->opal_dev);
--
2.43.5
On Wed, Feb 25, 2026 at 03:36:24PM +0000, John Garry wrote:
> Add support to attach a multipath disk.
>
> We still allocate the gendisk per path, and this is required for the
> per-path submission. However, those gendisks are marked as hidden. Those
> disks are named sdX:Y, where X is the multipath disk index and Y is the
> per-path index.
>
> A global list of sd_mpath_disks is kept for matching scsi_device's.
>
> The multipath gendisk has the name and disk->major/minor set to minic a
> scsi_disk.
>
> The following is an example of relevant scsi_disk and block sysfs
> directories:
>
> $ ls -l /sys/block/ | grep sdc
> lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc -> ../devices/virtual/scsi_mpath_disk/0/sdc
> lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc:0 -> ../devices/platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0
> lrwxrwxrwx 1 root root 0 Feb 24 16:02 sdc:1 -> ../devices/platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1
>
> $ ls -l /sys/class/scsi_mpath_disk/0/
> total 0
> drwxr-xr-x 2 root root 0 Feb 24 16:03 power
> drwxr-xr-x 11 root root 0 Feb 24 16:01 sdc
> lrwxrwxrwx 1 root root 0 Feb 24 16:01 subsystem -> ../../../../class/scsi_mpath_disk
> -rw-r--r-- 1 root root 4096 Feb 24 16:01 uevent
>
> $ ls -l /sys/class/scsi_mpath_disk/0/sdc/multipath/
> total 0
> lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:0 -> ../../../../../platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0
> lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:1 -> ../../../../../platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1
>
>
> $ ls -l /dev/sdc*
> brw-rw---- 1 root disk 8, 32 Feb 24 16:01 /dev/sdc
> brw-rw---- 1 root disk 8, 33 Feb 24 16:01 /dev/sdc1
> brw-rw---- 1 root disk 8, 34 Feb 24 16:01 /dev/sdc2
>
>
> $ lsblk /dev/sdc
> NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
> sdc 8:32 0 600M 0 disk
> |-sdc1 8:33 0 9M 0 part
> `-sdc2 8:34 0 568M 0 part
>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
> drivers/scsi/sd.c | 376 +++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 358 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 9617878b53ec6..409c0937764d9 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -117,12 +117,33 @@ static DEFINE_IDA(sd_index_ida);
> static mempool_t *sd_page_pool;
> static struct lock_class_key sd_bio_compl_lkclass;
> #ifdef CONFIG_SCSI_MULTIPATH
> +static LIST_HEAD(sd_mpath_disks_list);
> +static DEFINE_MUTEX(sd_mpath_disks_lock);
> +
> struct sd_mpath_disk {
> + struct device dev;
> + int disk_index;
> + int disk_count;
> + struct list_head entry;
> + struct mutex lock;
> struct mpath_disk *mpath_disk;
> + struct scsi_mpath_head *scsi_mpath_head;
> };
>
> static void sd_mpath_disk_release(struct device *dev)
> {
> + struct sd_mpath_disk *sd_mpath_disk =
> + container_of(dev, struct sd_mpath_disk, dev);
> + struct scsi_mpath_head *scsi_mpath_head =
> + sd_mpath_disk->scsi_mpath_head;
> + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;
> +
> + mpath_put_disk(mpath_disk);
> +
> + ida_free(&sd_index_ida, sd_mpath_disk->disk_index);
> + scsi_mpath_put_head(scsi_mpath_head);
> +
> + kfree(sd_mpath_disk);
> }
>
> static const struct class sd_mpath_disk_class = {
> @@ -4144,7 +4165,302 @@ static const struct scsi_mpath_pr_ops sd_mpath_pr_ops = {
> .pr_read_keys = sd_mpath_pr_read_keys,
> .pr_read_reservation = sd_mpath_pr_read_reservation,
> };
> +
> +static int sd_mpath_revalidate_head(struct scsi_disk *sdkp)
> +{
> + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
> + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;;
> + struct gendisk *disk = mpath_disk->disk;
> + struct queue_limits *sdkp_lim = &sdkp->disk->queue->limits;
> + struct queue_limits lim;
> + unsigned int memflags;
> + int ret;
> +
> + lim = queue_limits_start_update(disk->queue);
> + memflags = blk_mq_freeze_queue(disk->queue);
> +
> + lim.logical_block_size = sdkp_lim->logical_block_size;
> + lim.physical_block_size = sdkp_lim->physical_block_size;
> + lim.io_min = sdkp_lim->io_min;
> + lim.io_opt = sdkp_lim->io_opt;
> +
> + queue_limits_stack_bdev(&lim, sdkp->disk->part0, 0,
> + disk->disk_name);
> +
> + /* TODO: setup integrity limits */
> + lim.max_write_streams = sdkp_lim->max_write_streams;
> + lim.write_stream_granularity = sdkp_lim->write_stream_granularity;
> + ret = queue_limits_commit_update(disk->queue, &lim);
> +
> + set_capacity_and_notify(disk, get_capacity(sdkp->disk));
> +
> + blk_mq_unfreeze_queue(disk->queue, memflags);
> +
> + return ret;
> +}
> +static int sd_mpath_get_disk(struct sd_mpath_disk *sd_mpath_disk)
> +{
> + if (!get_device(&sd_mpath_disk->dev))
> + return -ENXIO;
> + return 0;
> +}
> +
> +static void sd_mpath_put_disk(struct sd_mpath_disk *sd_mpath_disk)
> +{
> + put_device(&sd_mpath_disk->dev);
> +}
> +
> +static struct sd_mpath_disk *sd_mpath_find_disk(struct scsi_device *sdp)
> +{
> + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
> + struct sd_mpath_disk *sd_mpath_disk;
> + int ret;
> +
> + mutex_lock(&sd_mpath_disks_lock);
> + list_for_each_entry(sd_mpath_disk, &sd_mpath_disks_list, entry) {
> + struct scsi_mpath_head *scsi_mpath_head;
> + struct mpath_disk *mpath_disk;
> + struct mpath_head *mpath_head;
> +
> + ret = sd_mpath_get_disk(sd_mpath_disk);
> + if (ret)
> + continue;
> + mpath_disk = sd_mpath_disk->mpath_disk;
> + mpath_head = mpath_disk->mpath_head;
> + scsi_mpath_head = mpath_head->drvdata;
> +
> + if (strncmp(scsi_mpath_head->wwid,
> + scsi_mpath_dev->device_id_str,
> + SCSI_MPATH_DEVICE_ID_LEN) == 0) {
> +
> + mutex_unlock(&sd_mpath_disks_lock);
> + return sd_mpath_disk;
> + }
> + sd_mpath_put_disk(sd_mpath_disk);
> + }
> +
> + return NULL;
> +}
> +
> +static void sd_mpath_add_disk(struct scsi_disk *sdkp)
> +{
> + struct scsi_device *sdp = sdkp->device;
> + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
> + struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device;
> + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk;
> + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;
> + struct mpath_head *mpath_head = mpath_disk->mpath_head;
> +
> + mpath_device->disk = sdkp->disk;
> + mpath_add_device(mpath_head, mpath_device);
> + mpath_device_set_live(mpath_disk, mpath_device);
> +}
> +
> +static int sd_mpath_probe(struct scsi_disk *sdkp)
> +{
> + struct scsi_device *sdp = sdkp->device;
> + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
> + struct device *dma_dev = sdp->host->dma_dev;
> + struct scsi_mpath_head *scsi_mpath_head =
> + scsi_mpath_dev->scsi_mpath_head;
> + struct sd_mpath_disk *sd_mpath_disk;
> + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
> + struct queue_limits lim;
> + struct gendisk *disk;
> + int error;
> +
> + /*
> + * sd_mpath_disks_list is kept locked if no disk found.
> + * Otherwise an extra reference is taken.
> + */
Again, I personally think the logic is easier to follow when all the
locking isn't split over multiple functions.
> + sd_mpath_disk = sd_mpath_find_disk(sdp);
> + if (sd_mpath_disk) {
> + mutex_lock(&sd_mpath_disk->lock);
> + sd_mpath_disk->disk_count++;
> + mutex_unlock(&sd_mpath_disk->lock);
> + goto found;
> + }
> +
> + sd_mpath_disk = kzalloc(sizeof(*sd_mpath_disk), GFP_KERNEL);
> + if (!sd_mpath_disk) {
> + error = -ENOMEM;
> + goto out_unlock;
> + }
> +
> + sd_mpath_disk->scsi_mpath_head = scsi_mpath_head;
> + device_initialize(&sd_mpath_disk->dev);
> + mutex_init(&sd_mpath_disk->lock);
> + sd_mpath_disk->dev.class = &sd_mpath_disk_class;
> +
> + blk_set_stacking_limits(&lim);
> + lim.dma_alignment = 3;
> + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
> + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
> +
> + sd_mpath_disk->mpath_disk = mpath_alloc_head_disk(&lim,
> + dev_to_node(dma_dev));
> + if (!sd_mpath_disk->mpath_disk) {
> + error = -ENOMEM;
> + goto out_free_disk;
> + }
> + disk = sd_mpath_disk->mpath_disk->disk;
> + mpath_get_head(mpath_head); /* undone in mpath_free_disk() */
> +
> + sd_mpath_disk->mpath_disk->mpath_head = mpath_head;
> + sd_mpath_disk->mpath_disk->parent = &sd_mpath_disk->dev;
> +
> + error = ida_alloc(&sd_index_ida, GFP_KERNEL);
> + if (error < 0) {
> + sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
> + goto out_put_disk;
> + }
> + sd_mpath_disk->disk_index = error;
> + error = sd_format_disk_name("sd", sd_mpath_disk->disk_index,
> + disk->disk_name, DISK_NAME_LEN);
> + if (error)
> + goto out_free_index;
> +
> + error = dev_set_name(&sd_mpath_disk->dev, "%s",
> + dev_name(&scsi_mpath_head->dev));
> + if (error)
> + goto out_free_index;
> +
> + /* undone in sd_mpath_disk_release() */
> + scsi_mpath_get_head(scsi_mpath_head);
> +
> + error = device_add(&sd_mpath_disk->dev);
> + if (error) {
> + put_device(&sd_mpath_disk->dev);
> + goto out_unlock;
We should clean up when we fail here, instead of just unlocking without
fully setting things up.
-Ben
> + }
> +
> + list_add_tail(&sd_mpath_disk->entry, &sd_mpath_disks_list);
> + disk->major = sd_major((sd_mpath_disk->disk_index & 0xf0) >> 4);
> + disk->first_minor = ((sd_mpath_disk->disk_index & 0xf) << 4) |
> + (sd_mpath_disk->disk_index & 0xfff00);
> + disk->minors = SD_MINORS;
> +
> + sd_mpath_disk->disk_count = 1;
> + mutex_unlock(&sd_mpath_disks_lock);
> +
> +found:
> + sdkp->sd_mpath_disk = sd_mpath_disk;
> + sdkp->disk->flags |= GENHD_FL_HIDDEN;
> + snprintf(sdkp->disk->disk_name, DISK_NAME_LEN, "%s:%d",
> + sd_mpath_disk->mpath_disk->disk->disk_name,
> + scsi_mpath_dev->index);
> +
> + sdkp->index = -1;
> + return 0;
> +
> +out_free_index:
> + ida_free(&sd_index_ida, sd_mpath_disk->disk_index);
> +out_put_disk:
> + mpath_put_disk(sd_mpath_disk->mpath_disk);
> +out_free_disk:
> + kfree(sd_mpath_disk);
> +out_unlock:
> + mutex_unlock(&sd_mpath_disks_lock);
> + return error;
> +}
On 10/03/2026 02:40, Benjamin Marzinski wrote:
>> +static int sd_mpath_probe(struct scsi_disk *sdkp)
>> +{
>> + struct scsi_device *sdp = sdkp->device;
>> + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
>> + struct device *dma_dev = sdp->host->dma_dev;
>> + struct scsi_mpath_head *scsi_mpath_head =
>> + scsi_mpath_dev->scsi_mpath_head;
>> + struct sd_mpath_disk *sd_mpath_disk;
>> + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
>> + struct queue_limits lim;
>> + struct gendisk *disk;
>> + int error;
>> +
>> + /*
>> + * sd_mpath_disks_list is kept locked if no disk found.
>> + * Otherwise an extra reference is taken.
>> + */
> Again, I personally think the logic is easier to follow when all the
> locking isn't split over multiple functions.
Sure, but I am considering removing the mpath_disk structure, so things
may change here anyway. Removing mpath_disk should simplify things for
the nvme driver transition.
>
>> + sd_mpath_disk = sd_mpath_find_disk(sdp);
>> + if (sd_mpath_disk) {
>> + mutex_lock(&sd_mpath_disk->lock);
>> + sd_mpath_disk->disk_count++;
>> + mutex_unlock(&sd_mpath_disk->lock);
>> + goto found;
>> + }
>> +
>> + sd_mpath_disk = kzalloc(sizeof(*sd_mpath_disk), GFP_KERNEL);
>> + if (!sd_mpath_disk) {
>> + error = -ENOMEM;
>> + goto out_unlock;
>> + }
>> +
>> + sd_mpath_disk->scsi_mpath_head = scsi_mpath_head;
>> + device_initialize(&sd_mpath_disk->dev);
>> + mutex_init(&sd_mpath_disk->lock);
>> + sd_mpath_disk->dev.class = &sd_mpath_disk_class;
>> +
>> + blk_set_stacking_limits(&lim);
>> + lim.dma_alignment = 3;
>> + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
>> + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
>> +
>> + sd_mpath_disk->mpath_disk = mpath_alloc_head_disk(&lim,
>> + dev_to_node(dma_dev));
>> + if (!sd_mpath_disk->mpath_disk) {
>> + error = -ENOMEM;
>> + goto out_free_disk;
>> + }
>> + disk = sd_mpath_disk->mpath_disk->disk;
>> + mpath_get_head(mpath_head); /* undone in mpath_free_disk() */
>> +
>> + sd_mpath_disk->mpath_disk->mpath_head = mpath_head;
>> + sd_mpath_disk->mpath_disk->parent = &sd_mpath_disk->dev;
>> +
>> + error = ida_alloc(&sd_index_ida, GFP_KERNEL);
>> + if (error < 0) {
>> + sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
>> + goto out_put_disk;
>> + }
>> + sd_mpath_disk->disk_index = error;
>> + error = sd_format_disk_name("sd", sd_mpath_disk->disk_index,
>> + disk->disk_name, DISK_NAME_LEN);
>> + if (error)
>> + goto out_free_index;
>> +
>> + error = dev_set_name(&sd_mpath_disk->dev, "%s",
>> + dev_name(&scsi_mpath_head->dev));
>> + if (error)
>> + goto out_free_index;
>> +
>> + /* undone in sd_mpath_disk_release() */
>> + scsi_mpath_get_head(scsi_mpath_head);
>> +
>> + error = device_add(&sd_mpath_disk->dev);
>> + if (error) {
>> + put_device(&sd_mpath_disk->dev);
>> + goto out_unlock;
> We should clean up when we fail here, instead of just unlocking without
> fully setting things up.
I think that the release function is called from put_device(), which
should do the class tidy up. Something similar is done in sd_probe() for
the disk_dev.
Thanks,
John
On Tue, Mar 10, 2026 at 10:12:07AM +0000, John Garry wrote:
> On 10/03/2026 02:40, Benjamin Marzinski wrote:
> > > +static int sd_mpath_probe(struct scsi_disk *sdkp)
> > > +{
> > > + struct scsi_device *sdp = sdkp->device;
> > > + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev;
> > > + struct device *dma_dev = sdp->host->dma_dev;
> > > + struct scsi_mpath_head *scsi_mpath_head =
> > > + scsi_mpath_dev->scsi_mpath_head;
> > > + struct sd_mpath_disk *sd_mpath_disk;
> > > + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head;
> > > + struct queue_limits lim;
> > > + struct gendisk *disk;
> > > + int error;
> > > +
> > > + /*
> > > + * sd_mpath_disks_list is kept locked if no disk found.
> > > + * Otherwise an extra reference is taken.
> > > + */
> > Again, I personally think the logic is easier to follow when all the
> > locking isn't split over multiple functions.
>
> Sure, but I am considering removing the mpath_disk structure, so things may
> change here anyway. Removing mpath_disk should simplify things for the nvme
> driver transition.
Sure.
>
> >
> > > + sd_mpath_disk = sd_mpath_find_disk(sdp);
> > > + if (sd_mpath_disk) {
> > > + mutex_lock(&sd_mpath_disk->lock);
> > > + sd_mpath_disk->disk_count++;
> > > + mutex_unlock(&sd_mpath_disk->lock);
> > > + goto found;
> > > + }
> > > +
> > > + sd_mpath_disk = kzalloc(sizeof(*sd_mpath_disk), GFP_KERNEL);
> > > + if (!sd_mpath_disk) {
> > > + error = -ENOMEM;
> > > + goto out_unlock;
> > > + }
> > > +
> > > + sd_mpath_disk->scsi_mpath_head = scsi_mpath_head;
> > > + device_initialize(&sd_mpath_disk->dev);
> > > + mutex_init(&sd_mpath_disk->lock);
> > > + sd_mpath_disk->dev.class = &sd_mpath_disk_class;
> > > +
> > > + blk_set_stacking_limits(&lim);
> > > + lim.dma_alignment = 3;
> > > + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
> > > + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
> > > +
> > > + sd_mpath_disk->mpath_disk = mpath_alloc_head_disk(&lim,
> > > + dev_to_node(dma_dev));
> > > + if (!sd_mpath_disk->mpath_disk) {
> > > + error = -ENOMEM;
> > > + goto out_free_disk;
> > > + }
> > > + disk = sd_mpath_disk->mpath_disk->disk;
> > > + mpath_get_head(mpath_head); /* undone in mpath_free_disk() */
> > > +
> > > + sd_mpath_disk->mpath_disk->mpath_head = mpath_head;
> > > + sd_mpath_disk->mpath_disk->parent = &sd_mpath_disk->dev;
> > > +
> > > + error = ida_alloc(&sd_index_ida, GFP_KERNEL);
> > > + if (error < 0) {
> > > + sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n");
> > > + goto out_put_disk;
> > > + }
> > > + sd_mpath_disk->disk_index = error;
> > > + error = sd_format_disk_name("sd", sd_mpath_disk->disk_index,
> > > + disk->disk_name, DISK_NAME_LEN);
> > > + if (error)
> > > + goto out_free_index;
> > > +
> > > + error = dev_set_name(&sd_mpath_disk->dev, "%s",
> > > + dev_name(&scsi_mpath_head->dev));
> > > + if (error)
> > > + goto out_free_index;
> > > +
> > > + /* undone in sd_mpath_disk_release() */
> > > + scsi_mpath_get_head(scsi_mpath_head);
> > > +
> > > + error = device_add(&sd_mpath_disk->dev);
> > > + if (error) {
> > > + put_device(&sd_mpath_disk->dev);
> > > + goto out_unlock;
> > We should clean up when we fail here, instead of just unlocking without
> > fully setting things up.
>
> I think that the release function is called from put_device(), which should
> do the class tidy up. Something similar is done in sd_probe() for the
> disk_dev.
Oops. You are correct.
-Ben
> Thanks,
> John
© 2016 - 2026 Red Hat, Inc.