[PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support

John Garry posted 8 patches 4 weeks, 1 day ago
[PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support
Posted by John Garry 4 weeks, 1 day ago
Add basic support just to get the per-port group state.

This support does not account of state transitioning, sdev port group
reconfiguration, etc, required for full support.

libmultipath callbacks scsi_mpath_is_optimized() and
scsi_mpath_is_disabled() are updated to take account of the ALUA-provided
path information.

As before, for no ALUA support (and scsi_multipath_always on) we assume
that the paths are all optimized.

Much of this code in scsi_mpath_alua_init() is copied from scsi_dh_alua.c,
originally authored by Hannes Reinecke.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 drivers/scsi/scsi_multipath.c | 163 ++++++++++++++++++++++++++++++++--
 include/scsi/scsi_multipath.h |   3 +
 2 files changed, 160 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
index 1489c7e979167..0a314080bf0a5 100644
--- a/drivers/scsi/scsi_multipath.c
+++ b/drivers/scsi/scsi_multipath.c
@@ -4,6 +4,7 @@
  *
  */
 
+#include <scsi/scsi_alua.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_driver.h>
 #include <scsi/scsi_proto.h>
@@ -346,18 +347,29 @@ static bool scsi_mpath_is_disabled(struct mpath_device *mpath_device)
 				to_scsi_mpath_device(mpath_device);
 	struct scsi_device *sdev = scsi_mpath_dev->sdev;
 	enum scsi_device_state sdev_state = sdev->sdev_state;
+	int alua_state = scsi_mpath_dev->alua_state;
 
 	if (sdev_state == SDEV_RUNNING || sdev_state == SDEV_CANCEL)
 		return false;
 
-	return true;
+	if (alua_state == SCSI_ACCESS_STATE_OPTIMAL ||
+	    alua_state == SCSI_ACCESS_STATE_ACTIVE)
+		return true;
+
+	return false;
 }
 
 static bool scsi_mpath_is_optimized(struct mpath_device *mpath_device)
 {
+	struct scsi_mpath_device *scsi_mpath_dev =
+				to_scsi_mpath_device(mpath_device);
+
 	if (scsi_mpath_is_disabled(mpath_device))
 		return false;
-	return true;
+	if (scsi_mpath_dev->alua_state == SCSI_ACCESS_STATE_OPTIMAL)
+		return true;
+	return false;
+
 }
 
 /* Until we have ALUA support, we're always optimised */
@@ -366,7 +378,7 @@ static enum mpath_access_state scsi_mpath_get_access_state(
 {
 	if (scsi_mpath_is_disabled(mpath_device))
 		return MPATH_STATE_INVALID;
-	return MPATH_STATE_OPTIMIZED;
+	return scsi_mpath_is_optimized(mpath_device);
 }
 
 static bool scsi_mpath_available_path(struct mpath_device *mpath_device, bool *available)
@@ -579,16 +591,147 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
 	sdev->scsi_mpath_dev = NULL;
 }
 
+static int scsi_mpath_alua_init(struct scsi_device *sdev)
+{
+	struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
+	struct scsi_sense_hdr sense_hdr;
+	int len, k, off, bufflen = ALUA_RTPG_SIZE;
+	unsigned char *desc, *buff;
+	unsigned int tpg_desc_tbl_off;
+	int group_id, rel_port = -1;
+	bool ext_hdr_unsupp = false;
+	int ret;
+
+	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
+	if (group_id < 0) {
+		/*
+		 * Internal error; TPGS supported but required
+		 * VPD identification descriptors not present.
+		 * Disable ALUA support.
+		 */
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: No target port descriptors found\n",
+			    __func__);
+		return -EIO;
+	}
+
+	buff = kzalloc(bufflen, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+ retry:
+	ret = submit_rtpg(sdev, buff, bufflen, &sense_hdr,
+				ext_hdr_unsupp);
+
+	if (ret) {
+		if (ret < 0 || !scsi_sense_valid(&sense_hdr)) {
+			sdev_printk(KERN_INFO, sdev,
+				    "%s: rtpg failed, result %d\n",
+				    __func__, ret);
+			kfree(buff);
+			if (ret < 0)
+				return -EBUSY;
+			if (host_byte(ret) == DID_NO_CONNECT)
+				return -ENODEV;
+			return -EIO;
+		}
+
+		/*
+		 * submit_rtpg() has failed on existing arrays
+		 * when requesting extended header info, and
+		 * the array doesn't support extended headers,
+		 * even though it shouldn't according to T10.
+		 * The retry without rtpg_ext_hdr_req set
+		 * handles this.
+		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
+		 * with ASC 00h if they don't support the extended header.
+		 */
+		if (ext_hdr_unsupp &&
+		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
+			ext_hdr_unsupp = true;
+			goto retry;
+		}
+		/*
+		 * If the array returns with 'ALUA state transition'
+		 * sense code here it cannot return RTPG data during
+		 * transition. So set the state to 'transitioning' directly.
+		 */
+		if (sense_hdr.sense_key == NOT_READY &&
+		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
+			goto out;
+
+		/*
+		 * Retry on any other UNIT ATTENTION occurred.
+		 */
+		if (sense_hdr.sense_key == UNIT_ATTENTION) {
+			scsi_print_sense_hdr(sdev, __func__, &sense_hdr);
+			kfree(buff);
+			return -EAGAIN;
+		}
+		sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
+			    __func__);
+		scsi_print_sense_hdr(sdev, __func__, &sense_hdr);
+		kfree(buff);
+		return -EIO;
+	}
+
+	len = get_unaligned_be32(&buff[0]) + 4;
+
+	if (len > bufflen) {
+		/* Resubmit with the correct length */
+		kfree(buff);
+		bufflen = len;
+		buff = kmalloc(bufflen, GFP_KERNEL);
+		if (!buff) {
+			/* Temporary failure, bypass */
+			return -EBUSY;
+		}
+		goto retry;
+	}
+
+	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
+		tpg_desc_tbl_off = 8;
+	else
+		tpg_desc_tbl_off = 4;
+
+	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
+	     k < len;
+	     k += off, desc += off) {
+		u16 group_id_found = get_unaligned_be16(&desc[2]);
+
+		if (group_id_found == group_id) {
+			int valid_states, state, pref;
+
+			state = desc[0] & 0x0f;
+			pref = desc[0] >> 7;
+			valid_states = desc[1];
+
+			alua_print_info(sdev, group_id, state, pref, valid_states);
+
+			scsi_mpath_dev->alua_state = state;
+			scsi_mpath_dev->alua_pref = pref;
+			scsi_mpath_dev->alua_valid_states = valid_states;
+			goto out;
+		}
+
+		off = 8 + (desc[7] * 4);
+	}
+
+out:
+	kfree(buff);
+	return 0;
+}
+
 int scsi_mpath_dev_alloc(struct scsi_device *sdev)
 {
 	struct scsi_mpath_head *scsi_mpath_head;
-	int ret;
+	int ret, tpgs;
 
 	if (!scsi_multipath)
 		return 0;
 
-	if (!scsi_device_tpgs(sdev) && !scsi_multipath_always) {
-		sdev_printk(KERN_NOTICE, sdev, "tpgs are required for multipath support\n");
+	tpgs = alua_check_tpgs(sdev);
+	if (!(tpgs & TPGS_MODE_IMPLICIT) && !scsi_multipath_always) {
+		sdev_printk(KERN_DEBUG, sdev, "IMPLICIT TPGS are required for multipath support\n");
 		return 0;
 	}
 
@@ -622,6 +765,14 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
 	sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
 
 found:
+	if (tpgs & TPGS_MODE_IMPLICIT) {
+		ret = scsi_mpath_alua_init(sdev);
+		if (ret)
+			goto out_put_head;
+	} else {
+		sdev->scsi_mpath_dev->alua_state = SCSI_ACCESS_STATE_OPTIMAL;
+	}
+
 	sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
 	if (sdev->scsi_mpath_dev->index < 0) {
 		ret = sdev->scsi_mpath_dev->index;
diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
index 2011447f482d6..7c7ee2fb7def7 100644
--- a/include/scsi/scsi_multipath.h
+++ b/include/scsi/scsi_multipath.h
@@ -38,6 +38,9 @@ struct scsi_mpath_device {
 	int			index;
 	atomic_t		nr_active;
 	struct scsi_mpath_head	*scsi_mpath_head;
+	int			alua_state;
+	int			alua_pref;
+	int			alua_valid_states;
 
 	char			device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
 };
-- 
2.43.5
Re: [PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support
Posted by Benjamin Marzinski 3 weeks, 5 days ago
On Tue, Mar 10, 2026 at 11:49:22AM +0000, John Garry wrote:
> Add basic support just to get the per-port group state.
> 
> This support does not account of state transitioning, sdev port group
> reconfiguration, etc, required for full support.
> 
> libmultipath callbacks scsi_mpath_is_optimized() and
> scsi_mpath_is_disabled() are updated to take account of the ALUA-provided
> path information.
> 
> As before, for no ALUA support (and scsi_multipath_always on) we assume
> that the paths are all optimized.
> 
> Much of this code in scsi_mpath_alua_init() is copied from scsi_dh_alua.c,
> originally authored by Hannes Reinecke.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  drivers/scsi/scsi_multipath.c | 163 ++++++++++++++++++++++++++++++++--
>  include/scsi/scsi_multipath.h |   3 +
>  2 files changed, 160 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
> index 1489c7e979167..0a314080bf0a5 100644
> --- a/drivers/scsi/scsi_multipath.c
> +++ b/drivers/scsi/scsi_multipath.c
> @@ -4,6 +4,7 @@
>   *
>   */
>  
> +#include <scsi/scsi_alua.h>
>  #include <scsi/scsi_cmnd.h>
>  #include <scsi/scsi_driver.h>
>  #include <scsi/scsi_proto.h>
> @@ -346,18 +347,29 @@ static bool scsi_mpath_is_disabled(struct mpath_device *mpath_device)
>  				to_scsi_mpath_device(mpath_device);
>  	struct scsi_device *sdev = scsi_mpath_dev->sdev;
>  	enum scsi_device_state sdev_state = sdev->sdev_state;
> +	int alua_state = scsi_mpath_dev->alua_state;
>  
>  	if (sdev_state == SDEV_RUNNING || sdev_state == SDEV_CANCEL)
>  		return false;
>  
> -	return true;
> +	if (alua_state == SCSI_ACCESS_STATE_OPTIMAL ||
> +	    alua_state == SCSI_ACCESS_STATE_ACTIVE)
> +		return true;
> +
> +	return false;
>  }
>  
>  static bool scsi_mpath_is_optimized(struct mpath_device *mpath_device)
>  {
> +	struct scsi_mpath_device *scsi_mpath_dev =
> +				to_scsi_mpath_device(mpath_device);
> +
>  	if (scsi_mpath_is_disabled(mpath_device))
>  		return false;
> -	return true;
> +	if (scsi_mpath_dev->alua_state == SCSI_ACCESS_STATE_OPTIMAL)
> +		return true;
> +	return false;
> +
>  }
>  
>  /* Until we have ALUA support, we're always optimised */
> @@ -366,7 +378,7 @@ static enum mpath_access_state scsi_mpath_get_access_state(
>  {
>  	if (scsi_mpath_is_disabled(mpath_device))
>  		return MPATH_STATE_INVALID;
> -	return MPATH_STATE_OPTIMIZED;
> +	return scsi_mpath_is_optimized(mpath_device);
>  }
>  
>  static bool scsi_mpath_available_path(struct mpath_device *mpath_device, bool *available)
> @@ -579,16 +591,147 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
>  	sdev->scsi_mpath_dev = NULL;
>  }
>  
> +static int scsi_mpath_alua_init(struct scsi_device *sdev)
> +{
> +	struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
> +	struct scsi_sense_hdr sense_hdr;
> +	int len, k, off, bufflen = ALUA_RTPG_SIZE;
> +	unsigned char *desc, *buff;
> +	unsigned int tpg_desc_tbl_off;
> +	int group_id, rel_port = -1;
> +	bool ext_hdr_unsupp = false;
> +	int ret;
> +
> +	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> +	if (group_id < 0) {
> +		/*
> +		 * Internal error; TPGS supported but required
> +		 * VPD identification descriptors not present.
> +		 * Disable ALUA support.
> +		 */
> +		sdev_printk(KERN_INFO, sdev,
> +			    "%s: No target port descriptors found\n",
> +			    __func__);
> +		return -EIO;
> +	}
> +
> +	buff = kzalloc(bufflen, GFP_KERNEL);
> +	if (!buff)
> +		return -ENOMEM;
> + retry:
> +	ret = submit_rtpg(sdev, buff, bufflen, &sense_hdr,
> +				ext_hdr_unsupp);
> +
> +	if (ret) {
> +		if (ret < 0 || !scsi_sense_valid(&sense_hdr)) {
> +			sdev_printk(KERN_INFO, sdev,
> +				    "%s: rtpg failed, result %d\n",
> +				    __func__, ret);
> +			kfree(buff);
> +			if (ret < 0)
> +				return -EBUSY;
> +			if (host_byte(ret) == DID_NO_CONNECT)
> +				return -ENODEV;
> +			return -EIO;
> +		}
> +
> +		/*
> +		 * submit_rtpg() has failed on existing arrays
> +		 * when requesting extended header info, and
> +		 * the array doesn't support extended headers,
> +		 * even though it shouldn't according to T10.
> +		 * The retry without rtpg_ext_hdr_req set
> +		 * handles this.
> +		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
> +		 * with ASC 00h if they don't support the extended header.
> +		 */
> +		if (ext_hdr_unsupp &&
> +		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
> +			ext_hdr_unsupp = true;
> +			goto retry;
> +		}
> +		/*
> +		 * If the array returns with 'ALUA state transition'
> +		 * sense code here it cannot return RTPG data during
> +		 * transition. So set the state to 'transitioning' directly.
> +		 */
> +		if (sense_hdr.sense_key == NOT_READY &&
> +		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
> +			goto out;

This check is odd. First, we don't set the state to 'transitioning' like
the comment says. We don't set alua_state at all, which ends up meaning
that it stays as 0 (SCSI_ACCESS_STATE_OPTIMAL). It seems like we should
explicitly set it and make the comment reflect that, if just to aid
understanding of the logic.

Nitpick: Also, this is the only place where we goto out. All the other
checks individually free the buffer and return directly. I realize that
all the other checks that exit early are errors, but it seems like we
could just return a variable at the end of the function.

> +
> +		/*
> +		 * Retry on any other UNIT ATTENTION occurred.
> +		 */
> +		if (sense_hdr.sense_key == UNIT_ATTENTION) {
> +			scsi_print_sense_hdr(sdev, __func__, &sense_hdr);
> +			kfree(buff);
> +			return -EAGAIN;
> +		}

If we get a UNIT ATTENTION, we end up failing scsi_mpath_dev_alloc(),
not retrying. Aside from the comment being wrong, it seems very brittle
to fail here, just because we got a UNIT ATTENTION.

-Ben

> +		sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
> +			    __func__);
> +		scsi_print_sense_hdr(sdev, __func__, &sense_hdr);
> +		kfree(buff);
> +		return -EIO;
> +	}
> +
> +	len = get_unaligned_be32(&buff[0]) + 4;
> +
> +	if (len > bufflen) {
> +		/* Resubmit with the correct length */
> +		kfree(buff);
> +		bufflen = len;
> +		buff = kmalloc(bufflen, GFP_KERNEL);
> +		if (!buff) {
> +			/* Temporary failure, bypass */
> +			return -EBUSY;
> +		}
> +		goto retry;
> +	}
> +
> +	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
> +		tpg_desc_tbl_off = 8;
> +	else
> +		tpg_desc_tbl_off = 4;
> +
> +	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
> +	     k < len;
> +	     k += off, desc += off) {
> +		u16 group_id_found = get_unaligned_be16(&desc[2]);
> +
> +		if (group_id_found == group_id) {
> +			int valid_states, state, pref;
> +
> +			state = desc[0] & 0x0f;
> +			pref = desc[0] >> 7;
> +			valid_states = desc[1];
> +
> +			alua_print_info(sdev, group_id, state, pref, valid_states);
> +
> +			scsi_mpath_dev->alua_state = state;
> +			scsi_mpath_dev->alua_pref = pref;
> +			scsi_mpath_dev->alua_valid_states = valid_states;
> +			goto out;
> +		}
> +
> +		off = 8 + (desc[7] * 4);
> +	}
> +
> +out:
> +	kfree(buff);
> +	return 0;
> +}
> +
>  int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>  {
>  	struct scsi_mpath_head *scsi_mpath_head;
> -	int ret;
> +	int ret, tpgs;
>  
>  	if (!scsi_multipath)
>  		return 0;
>  
> -	if (!scsi_device_tpgs(sdev) && !scsi_multipath_always) {
> -		sdev_printk(KERN_NOTICE, sdev, "tpgs are required for multipath support\n");
> +	tpgs = alua_check_tpgs(sdev);
> +	if (!(tpgs & TPGS_MODE_IMPLICIT) && !scsi_multipath_always) {
> +		sdev_printk(KERN_DEBUG, sdev, "IMPLICIT TPGS are required for multipath support\n");
>  		return 0;
>  	}
>  
> @@ -622,6 +765,14 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>  	sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
>  
>  found:
> +	if (tpgs & TPGS_MODE_IMPLICIT) {
> +		ret = scsi_mpath_alua_init(sdev);
> +		if (ret)
> +			goto out_put_head;
> +	} else {
> +		sdev->scsi_mpath_dev->alua_state = SCSI_ACCESS_STATE_OPTIMAL;
> +	}
> +
>  	sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
>  	if (sdev->scsi_mpath_dev->index < 0) {
>  		ret = sdev->scsi_mpath_dev->index;
> diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
> index 2011447f482d6..7c7ee2fb7def7 100644
> --- a/include/scsi/scsi_multipath.h
> +++ b/include/scsi/scsi_multipath.h
> @@ -38,6 +38,9 @@ struct scsi_mpath_device {
>  	int			index;
>  	atomic_t		nr_active;
>  	struct scsi_mpath_head	*scsi_mpath_head;
> +	int			alua_state;
> +	int			alua_pref;
> +	int			alua_valid_states;
>  
>  	char			device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
>  };
> -- 
> 2.43.5
Re: [PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support
Posted by John Garry 3 weeks, 2 days ago
On 14/03/2026 04:48, Benjamin Marzinski wrote:
>> +			ext_hdr_unsupp = true;
>> +			goto retry;
>> +		}
>> +		/*
>> +		 * If the array returns with 'ALUA state transition'
>> +		 * sense code here it cannot return RTPG data during
>> +		 * transition. So set the state to 'transitioning' directly.
>> +		 */
>> +		if (sense_hdr.sense_key == NOT_READY &&
>> +		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
>> +			goto out;
> This check is odd. First, we don't set the state to 'transitioning' like
> the comment says. We don't set alua_state at all, which ends up meaning
> that it stays as 0 (SCSI_ACCESS_STATE_OPTIMAL). It seems like we should
> explicitly set it and make the comment reflect that, if just to aid
> understanding of the logic.

Yeah, I have to admit that this is all bodged the a bit, as 
transitioning is not properly handled. Instead of "basic" ALUA support, 
this really is limited ALUA support.

I am now looking at a way to have a core scsi ALUA driver to handle all 
of this, but it is challenging as we need to continue ALUA DH support.

> 
> Nitpick: Also, this is the only place where we goto out. All the other
> checks individually free the buffer and return directly. I realize that
> all the other checks that exit early are errors, but it seems like we
> could just return a variable at the end of the function.

Sure, I can pay attention to this. However, as mentioned above, I am 
experimenting with moving any ALUA specifics into scsi core code.

> 
>> +
>> +		/*
>> +		 * Retry on any other UNIT ATTENTION occurred.
>> +		 */
>> +		if (sense_hdr.sense_key == UNIT_ATTENTION) {
>> +			scsi_print_sense_hdr(sdev, __func__, &sense_hdr);
>> +			kfree(buff);
>> +			return -EAGAIN;
>> +		}
> If we get a UNIT ATTENTION, we end up failing scsi_mpath_dev_alloc(),
> not retrying. Aside from the comment being wrong, it seems very brittle
> to fail here, just because we got a UNIT ATTENTION.

Ack

Thanks,
John
Re: [PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support
Posted by Hannes Reinecke 4 weeks, 1 day ago
On 3/10/26 12:49, John Garry wrote:
> Add basic support just to get the per-port group state.
> 
> This support does not account of state transitioning, sdev port group
> reconfiguration, etc, required for full support.
> 
> libmultipath callbacks scsi_mpath_is_optimized() and
> scsi_mpath_is_disabled() are updated to take account of the ALUA-provided
> path information.
> 
> As before, for no ALUA support (and scsi_multipath_always on) we assume
> that the paths are all optimized.
> 
> Much of this code in scsi_mpath_alua_init() is copied from scsi_dh_alua.c,
> originally authored by Hannes Reinecke.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>   drivers/scsi/scsi_multipath.c | 163 ++++++++++++++++++++++++++++++++--
>   include/scsi/scsi_multipath.h |   3 +
>   2 files changed, 160 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
> index 1489c7e979167..0a314080bf0a5 100644
> --- a/drivers/scsi/scsi_multipath.c
> +++ b/drivers/scsi/scsi_multipath.c
> @@ -4,6 +4,7 @@
>    *
>    */
>   
> +#include <scsi/scsi_alua.h>
>   #include <scsi/scsi_cmnd.h>
>   #include <scsi/scsi_driver.h>
>   #include <scsi/scsi_proto.h>
> @@ -346,18 +347,29 @@ static bool scsi_mpath_is_disabled(struct mpath_device *mpath_device)
>   				to_scsi_mpath_device(mpath_device);
>   	struct scsi_device *sdev = scsi_mpath_dev->sdev;
>   	enum scsi_device_state sdev_state = sdev->sdev_state;
> +	int alua_state = scsi_mpath_dev->alua_state;
>   
>   	if (sdev_state == SDEV_RUNNING || sdev_state == SDEV_CANCEL)
>   		return false;
>   
> -	return true;
> +	if (alua_state == SCSI_ACCESS_STATE_OPTIMAL ||
> +	    alua_state == SCSI_ACCESS_STATE_ACTIVE)
> +		return true;
> +
> +	return false;
>   }
>   
>   static bool scsi_mpath_is_optimized(struct mpath_device *mpath_device)
>   {
> +	struct scsi_mpath_device *scsi_mpath_dev =
> +				to_scsi_mpath_device(mpath_device);
> +
>   	if (scsi_mpath_is_disabled(mpath_device))
>   		return false;
> -	return true;
> +	if (scsi_mpath_dev->alua_state == SCSI_ACCESS_STATE_OPTIMAL)
> +		return true;
> +	return false;
> +
>   }
>   
>   /* Until we have ALUA support, we're always optimised */
> @@ -366,7 +378,7 @@ static enum mpath_access_state scsi_mpath_get_access_state(
>   {
>   	if (scsi_mpath_is_disabled(mpath_device))
>   		return MPATH_STATE_INVALID;
> -	return MPATH_STATE_OPTIMIZED;
> +	return scsi_mpath_is_optimized(mpath_device);
>   }
>   
>   static bool scsi_mpath_available_path(struct mpath_device *mpath_device, bool *available)
> @@ -579,16 +591,147 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev)
>   	sdev->scsi_mpath_dev = NULL;
>   }
>   
> +static int scsi_mpath_alua_init(struct scsi_device *sdev)
> +{
> +	struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
> +	struct scsi_sense_hdr sense_hdr;
> +	int len, k, off, bufflen = ALUA_RTPG_SIZE;
> +	unsigned char *desc, *buff;
> +	unsigned int tpg_desc_tbl_off;
> +	int group_id, rel_port = -1;
> +	bool ext_hdr_unsupp = false;
> +	int ret;
> +
> +	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> +	if (group_id < 0) {
> +		/*
> +		 * Internal error; TPGS supported but required
> +		 * VPD identification descriptors not present.
> +		 * Disable ALUA support.
> +		 */
> +		sdev_printk(KERN_INFO, sdev,
> +			    "%s: No target port descriptors found\n",
> +			    __func__);
> +		return -EIO;
> +	}
> +
> +	buff = kzalloc(bufflen, GFP_KERNEL);
> +	if (!buff)
> +		return -ENOMEM;
> + retry:
> +	ret = submit_rtpg(sdev, buff, bufflen, &sense_hdr,
> +				ext_hdr_unsupp);
> +
> +	if (ret) {
> +		if (ret < 0 || !scsi_sense_valid(&sense_hdr)) {
> +			sdev_printk(KERN_INFO, sdev,
> +				    "%s: rtpg failed, result %d\n",
> +				    __func__, ret);
> +			kfree(buff);
> +			if (ret < 0)
> +				return -EBUSY;
> +			if (host_byte(ret) == DID_NO_CONNECT)
> +				return -ENODEV;
> +			return -EIO;
> +		}
> +
> +		/*
> +		 * submit_rtpg() has failed on existing arrays
> +		 * when requesting extended header info, and
> +		 * the array doesn't support extended headers,
> +		 * even though it shouldn't according to T10.
> +		 * The retry without rtpg_ext_hdr_req set
> +		 * handles this.
> +		 * Note:  some arrays return a sense key of ILLEGAL_REQUEST
> +		 * with ASC 00h if they don't support the extended header.
> +		 */
> +		if (ext_hdr_unsupp &&
> +		    sense_hdr.sense_key == ILLEGAL_REQUEST) {
> +			ext_hdr_unsupp = true;
> +			goto retry;
> +		}
> +		/*
> +		 * If the array returns with 'ALUA state transition'
> +		 * sense code here it cannot return RTPG data during
> +		 * transition. So set the state to 'transitioning' directly.
> +		 */
> +		if (sense_hdr.sense_key == NOT_READY &&
> +		    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
> +			goto out;
> +
> +		/*
> +		 * Retry on any other UNIT ATTENTION occurred.
> +		 */
> +		if (sense_hdr.sense_key == UNIT_ATTENTION) {
> +			scsi_print_sense_hdr(sdev, __func__, &sense_hdr);
> +			kfree(buff);
> +			return -EAGAIN;
> +		}
> +		sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
> +			    __func__);
> +		scsi_print_sense_hdr(sdev, __func__, &sense_hdr);
> +		kfree(buff);
> +		return -EIO;
> +	}
> +
> +	len = get_unaligned_be32(&buff[0]) + 4;
> +
> +	if (len > bufflen) {
> +		/* Resubmit with the correct length */
> +		kfree(buff);
> +		bufflen = len;
> +		buff = kmalloc(bufflen, GFP_KERNEL);
> +		if (!buff) {
> +			/* Temporary failure, bypass */
> +			return -EBUSY;
> +		}
> +		goto retry;
> +	}
> +
> +	if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
> +		tpg_desc_tbl_off = 8;
> +	else
> +		tpg_desc_tbl_off = 4;
> +
> +	for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
> +	     k < len;
> +	     k += off, desc += off) {
> +		u16 group_id_found = get_unaligned_be16(&desc[2]);
> +
> +		if (group_id_found == group_id) {
> +			int valid_states, state, pref;
> +
> +			state = desc[0] & 0x0f;
> +			pref = desc[0] >> 7;
> +			valid_states = desc[1];
> +
> +			alua_print_info(sdev, group_id, state, pref, valid_states);
> +
> +			scsi_mpath_dev->alua_state = state;
> +			scsi_mpath_dev->alua_pref = pref;
> +			scsi_mpath_dev->alua_valid_states = valid_states;
> +			goto out;
> +		}
> +
> +		off = 8 + (desc[7] * 4);
> +	}
> +
> +out:
> +	kfree(buff);
> +	return 0;
> +}
> +
>   int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>   {
>   	struct scsi_mpath_head *scsi_mpath_head;
> -	int ret;
> +	int ret, tpgs;
>   
>   	if (!scsi_multipath)
>   		return 0;
>   
> -	if (!scsi_device_tpgs(sdev) && !scsi_multipath_always) {
> -		sdev_printk(KERN_NOTICE, sdev, "tpgs are required for multipath support\n");
> +	tpgs = alua_check_tpgs(sdev);
> +	if (!(tpgs & TPGS_MODE_IMPLICIT) && !scsi_multipath_always) {
> +		sdev_printk(KERN_DEBUG, sdev, "IMPLICIT TPGS are required for multipath support\n");
>   		return 0;
>   	}
>   
> @@ -622,6 +765,14 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev)
>   	sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head;
>   
>   found:
> +	if (tpgs & TPGS_MODE_IMPLICIT) {
> +		ret = scsi_mpath_alua_init(sdev);
> +		if (ret)
> +			goto out_put_head;
> +	} else {
> +		sdev->scsi_mpath_dev->alua_state = SCSI_ACCESS_STATE_OPTIMAL;
> +	}
> +
>   	sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL);
>   	if (sdev->scsi_mpath_dev->index < 0) {
>   		ret = sdev->scsi_mpath_dev->index;
> diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h
> index 2011447f482d6..7c7ee2fb7def7 100644
> --- a/include/scsi/scsi_multipath.h
> +++ b/include/scsi/scsi_multipath.h
> @@ -38,6 +38,9 @@ struct scsi_mpath_device {
>   	int			index;
>   	atomic_t		nr_active;
>   	struct scsi_mpath_head	*scsi_mpath_head;
> +	int			alua_state;
> +	int			alua_pref;
> +	int			alua_valid_states;
>   
>   	char			device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
>   };

Is there a specific reason why this cannot be in the generic code?
After all, if the device reports anything else than ALUA_STATE_OPTIMAL
or ALUA_STATE_ACTIVE I/O will fail, irrespective of multipath being
active.

I would love to see that in the generic SCSI code, independent on this 
patchset. It would allow us to simplify the device handler code, too,
as then device handler really would only be required for explicit
ALUA. (And could be ignored for scsi-multipathing).

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.com                               +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Re: [PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support
Posted by John Garry 4 weeks, 1 day ago
On 10/03/2026 13:23, Hannes Reinecke wrote:
>>       sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, 
>> GFP_KERNEL);
>>       if (sdev->scsi_mpath_dev->index < 0) {
>>           ret = sdev->scsi_mpath_dev->index;
>> diff --git a/include/scsi/scsi_multipath.h b/include/scsi/ 
>> scsi_multipath.h
>> index 2011447f482d6..7c7ee2fb7def7 100644
>> --- a/include/scsi/scsi_multipath.h
>> +++ b/include/scsi/scsi_multipath.h
>> @@ -38,6 +38,9 @@ struct scsi_mpath_device {
>>       int            index;
>>       atomic_t        nr_active;
>>       struct scsi_mpath_head    *scsi_mpath_head;
>> +    int            alua_state;
>> +    int            alua_pref;
>> +    int            alua_valid_states;
>>       char            device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
>>   };
> 
> Is there a specific reason why this cannot be in the generic code?

Sure, it's possible....

> After all, if the device reports anything else than ALUA_STATE_OPTIMAL
> or ALUA_STATE_ACTIVE I/O will fail, irrespective of multipath being
> active.
> 
> I would love to see that in the generic SCSI code, independent on this 
> patchset. It would allow us to simplify the device handler code, too,
> as then device handler really would only be required for explicit
> ALUA. (And could be ignored for scsi-multipathing).

Right, so you would like to see alua_port_group management in a core 
ALUA driver as well, right?

If yes, to repeat, it is hard to separate the DH stuff out...but I can 
try. Examples I would need to deal with (and associated handling):

- alua_port_group members like dh_list
- alua_dh_data memebers like init_error
- everything in alua_queue_data

Thanks

Re: [PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support
Posted by Hannes Reinecke 4 weeks, 1 day ago
On 3/10/26 16:52, John Garry wrote:
> On 10/03/2026 13:23, Hannes Reinecke wrote:
>>>       sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, 
>>> GFP_KERNEL);
>>>       if (sdev->scsi_mpath_dev->index < 0) {
>>>           ret = sdev->scsi_mpath_dev->index;
>>> diff --git a/include/scsi/scsi_multipath.h b/include/scsi/ 
>>> scsi_multipath.h
>>> index 2011447f482d6..7c7ee2fb7def7 100644
>>> --- a/include/scsi/scsi_multipath.h
>>> +++ b/include/scsi/scsi_multipath.h
>>> @@ -38,6 +38,9 @@ struct scsi_mpath_device {
>>>       int            index;
>>>       atomic_t        nr_active;
>>>       struct scsi_mpath_head    *scsi_mpath_head;
>>> +    int            alua_state;
>>> +    int            alua_pref;
>>> +    int            alua_valid_states;
>>>       char            device_id_str[SCSI_MPATH_DEVICE_ID_LEN];
>>>   };
>>
>> Is there a specific reason why this cannot be in the generic code?
> 
> Sure, it's possible....
> 
>> After all, if the device reports anything else than ALUA_STATE_OPTIMAL
>> or ALUA_STATE_ACTIVE I/O will fail, irrespective of multipath being
>> active.
>>
>> I would love to see that in the generic SCSI code, independent on this 
>> patchset. It would allow us to simplify the device handler code, too,
>> as then device handler really would only be required for explicit
>> ALUA. (And could be ignored for scsi-multipathing).
> 
> Right, so you would like to see alua_port_group management in a core 
> ALUA driver as well, right?
> 
> If yes, to repeat, it is hard to separate the DH stuff out...but I can 
> try. Examples I would need to deal with (and associated handling):
> 
> - alua_port_group members like dh_list
> - alua_dh_data memebers like init_error
> - everything in alua_queue_data
> 
While the port group handling looks nice (and there certainly is
a certain neatness to it), it kinda assumes too much about the
internal layout of the hierarchy within the target.
Technically, a target is only required to provide a device
identifier, and a group id (such that you can match with
RTPG output). However, you have no idea which of the various
device IDs are part of the same enclosure; that information
is not required to be present.
So you cannot assume that group ID A reported from device X
is the same group as group ID A reported from device Y.
The only reliable way is to check with the RTPG output, as
that contains all device identifiers for the defined group
IDs.
But: caching RTPG output is problematic (as it'll change
whenever a path state change happens), and it'll need to
contain references to the SCSI devices, introducing all
sorts of locking issues and race conditions.
So probably I would not go down that way (at least initially),
but rather read RTPG during scanning, and set the values
directly in the scsi device.

We then need to re-read that information whenever we hit
a relevant sense code, but arguably we'll need to do that
anyway.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.com                               +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Re: [PATCH 5/8] scsi: scsi-multipath: Add basic ALUA support
Posted by John Garry 4 weeks, 1 day ago
On 10/03/2026 17:54, Hannes Reinecke wrote:
>>
>> If yes, to repeat, it is hard to separate the DH stuff out...but I can 
>> try. Examples I would need to deal with (and associated handling):
>>
>> - alua_port_group members like dh_list
>> - alua_dh_data memebers like init_error
>> - everything in alua_queue_data
>>
> While the port group handling looks nice (and there certainly is
> a certain neatness to it), it kinda assumes too much about the
> internal layout of the hierarchy within the target.
> Technically, a target is only required to provide a device
> identifier, and a group id (such that you can match with
> RTPG output). However, you have no idea which of the various
> device IDs are part of the same enclosure; that information
> is not required to be present.
> So you cannot assume that group ID A reported from device X
> is the same group as group ID A reported from device Y.
> The only reliable way is to check with the RTPG output, as
> that contains all device identifiers for the defined group
> IDs.
> But: caching RTPG output is problematic (as it'll change
> whenever a path state change happens), and it'll need to
> contain references to the SCSI devices, introducing all
> sorts of locking issues and race conditions.
> So probably I would not go down that way (at least initially),
> but rather read RTPG during scanning, and set the values
> directly in the scsi device.
> 
> We then need to re-read that information whenever we hit
> a relevant sense code, but arguably we'll need to do that
> anyway.

Hmmm... what you are describing seems to be now more like what I have in 
this series except only I did not have the ALUA info stored in the 
scsi_device struct.

So if I go down these lines, maybe for sense-triggered updates I can 
have something where scsi-multipath.c or scsi_dh_alua.c can trigger the 
RTPG to be issued and this updates scsi_device ALUA info. It sounds 
simple enough, but probably isn't for scsi_dh_alua.c :)

Thanks!