cxl/mbox: support background operation abort requests

[PATCH 2/3] cxl/mbox: support aborting the current background operation

Posted by Davidlohr Bueso 1 year, 3 months ago

CXL 3.1 introduced the ability to request that the current on-going
background command be aborted. Add support for this, where the current
policy is for the request to occur whenever a new incoming bg command
wants to run. As such everything is left to user discretion and it
becomes impossible to hog the device/mailbox.

The context of doing the cancellation request is the same as the new
incoming command, and will always hold the mbox_mutex, guaranteeing
that any successful cancel does not race with a third thread coming
in and stealing the effort.

- For Sanitize, the thread doing the will cancel the work, and clean
on behalf of the respective wq callback that will never come.

- For the other bg commands, the sleeping thread is kicked and we
busy-wait until the polling flag is cleared.

In both scenarios, we guarantee that the aborted op's thread is no
longer around, giving the new bg op full authority to submit the command.

Semantics for devices that do not support such functionality are left
unchanged, and hence, with this, the driver benefits in both scenarios.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 drivers/cxl/cxlmem.h |  1 +
 drivers/cxl/pci.c    | 81 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index b933fb73ef8a..e843ffc3c23a 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -516,6 +516,7 @@ to_cxl_memdev_state(struct cxl_dev_state *cxlds)
 enum cxl_opcode {
 	CXL_MBOX_OP_INVALID		= 0x0000,
 	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
+	CXL_MBOX_OP_REQUEST_ABORT_BG_OP = 0x0005,
 	CXL_MBOX_OP_GET_EVENT_RECORD	= 0x0100,
 	CXL_MBOX_OP_CLEAR_EVENT_RECORD	= 0x0101,
 	CXL_MBOX_OP_GET_EVT_INT_POLICY	= 0x0102,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index f2378604669b..5da50e26e4c4 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -115,8 +115,8 @@ static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
 {
 	u64 reg;
 
-	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
-	return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100;
+	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
+	return FIELD_GET(CXLDEV_MBOX_STATUS_BG_CMD, reg) == 0;
 }
 
 static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
@@ -241,7 +241,8 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox,
 	 * hardware semantics and only allow device health status.
 	 */
 	if (mds->security.poll_tmo_secs > 0) {
-		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
+		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO &&
+		    mbox_cmd->opcode != CXL_MBOX_OP_REQUEST_ABORT_BG_OP)
 			return -EBUSY;
 	}
 
@@ -335,11 +336,64 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox,
 	return 0;
 }
 
+/*
+ * Return true implies that the request was successful and the on-going
+ * background operation was in fact aborted. This also guarantees that
+ * the respective thread is done.
+ */
+static bool cxl_try_to_cancel_background(struct cxl_mailbox *cxl_mbox)
+{
+	int rc;
+	struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+	struct device *dev = cxlds->dev;
+	struct cxl_mbox_cmd cmd = {
+		.opcode = CXL_MBOX_OP_REQUEST_ABORT_BG_OP
+	};
+
+	lockdep_assert_held(&cxl_mbox->mbox_mutex);
+
+	rc = __cxl_pci_mbox_send_cmd(cxl_mbox, &cmd);
+	if (rc) {
+		dev_dbg(dev, "Failed to send abort request : %d\n", rc);
+		return false;
+	}
+
+	if (!cxl_mbox_background_complete(cxlds))
+		return false;
+
+	if (mds->security.sanitize_active) {
+		/*
+		 * Cancel the work and cleanup on its behalf - we hold
+		 * the mbox_mutex, cannot race with cxl_mbox_sanitize_work().
+		 */
+		cancel_delayed_work_sync(&mds->security.poll_dwork);
+		mds->security.poll_tmo_secs = 0;
+		if (mds->security.sanitize_node)
+			sysfs_notify_dirent(mds->security.sanitize_node);
+		mds->security.sanitize_active = false;
+
+		dev_dbg(cxlds->dev, "Sanitization operation aborted\n");
+	} else {
+		/*
+		 * Kick the poller and wait for it to be done - no one else
+		 * can touch mbox regs. rcuwait_wake_up() provides full
+		 * barriers such that wake up occurs before waiting on the
+		 * bgpoll atomic to be cleared.
+		 */
+		rcuwait_wake_up(&cxl_mbox->mbox_wait);
+		atomic_cond_read_acquire(&cxl_mbox->poll_bgop, !VAL);
+	}
+
+	return true;
+}
+
 static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
 			     struct cxl_mbox_cmd *cmd)
 {
 	int rc;
 	struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
 	struct device *dev = cxlds->dev;
 
 	mutex_lock_io(&cxl_mbox->mbox_mutex);
@@ -348,10 +402,18 @@ static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
 	 * each other: no new bg operation can occur in between while polling.
 	 */
 	if (cxl_is_background_cmd(cmd->opcode)) {
-		if (atomic_read_acquire(&cxl_mbox->poll_bgop)) {
-			mutex_unlock(&cxl_mbox->mbox_mutex);
-			return -EBUSY;
+		if (mds->security.sanitize_active ||
+		    atomic_read_acquire(&cxl_mbox->poll_bgop)) {
+			if (!cxl_try_to_cancel_background(cxl_mbox)) {
+				mutex_unlock(&cxl_mbox->mbox_mutex);
+				return -EBUSY;
+			}
 		}
+		/*
+		 * ... at this point we know that the canceled
+		 * bgop context is gone, and we are the _only_
+		 * background command in town. Proceed to send it.
+		 */
 	}
 
 	rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd);
@@ -394,10 +456,11 @@ static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
 				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
 		cmd->return_code = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
 					     bg_status_reg);
-		dev_dbg(dev,
-			"Mailbox background operation (0x%04x) completed\n",
-			cmd->opcode);
+
+		dev_dbg(dev, "Mailbox background operation (0x%04x) %s\n",
+			cmd->opcode, !cmd->return_code ? "completed":"aborted");
 done:
+		/* ensure clearing poll_bop is the last operation */
 		atomic_set_release(&cxl_mbox->poll_bgop, 0);
 	}
 
-- 
2.46.1

Re: [PATCH 2/3] cxl/mbox: support aborting the current background operation

Posted by Dave Jiang 1 year ago


On 10/21/24 8:18 PM, Davidlohr Bueso wrote:
> CXL 3.1 introduced the ability to request that the current on-going
> background command be aborted. Add support for this, where the current
> policy is for the request to occur whenever a new incoming bg command
> wants to run. As such everything is left to user discretion and it
> becomes impossible to hog the device/mailbox.

Are you trying to say that the patch is changing the current behavior to where every time a new bg command comes in, it will abort the previous one?

> 
> The context of doing the cancellation request is the same as the new
> incoming command, and will always hold the mbox_mutex, guaranteeing
> that any successful cancel does not race with a third thread coming
> in and stealing the effort.
> 
> - For Sanitize, the thread doing the will cancel the work, and clean

doing the? seems to be missing a word here.

> on behalf of the respective wq callback that will never come.
> 
> - For the other bg commands, the sleeping thread is kicked and we
> busy-wait until the polling flag is cleared.
> 
> In both scenarios, we guarantee that the aborted op's thread is no
> longer around, giving the new bg op full authority to submit the command.
> 
> Semantics for devices that do not support such functionality are left
> unchanged, and hence, with this, the driver benefits in both scenarios.
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  drivers/cxl/cxlmem.h |  1 +
>  drivers/cxl/pci.c    | 81 +++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 73 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index b933fb73ef8a..e843ffc3c23a 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -516,6 +516,7 @@ to_cxl_memdev_state(struct cxl_dev_state *cxlds)
>  enum cxl_opcode {
>  	CXL_MBOX_OP_INVALID		= 0x0000,
>  	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
> +	CXL_MBOX_OP_REQUEST_ABORT_BG_OP = 0x0005,
>  	CXL_MBOX_OP_GET_EVENT_RECORD	= 0x0100,
>  	CXL_MBOX_OP_CLEAR_EVENT_RECORD	= 0x0101,
>  	CXL_MBOX_OP_GET_EVT_INT_POLICY	= 0x0102,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index f2378604669b..5da50e26e4c4 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -115,8 +115,8 @@ static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
>  {
>  	u64 reg;
>  
> -	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> -	return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100;
> +	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
> +	return FIELD_GET(CXLDEV_MBOX_STATUS_BG_CMD, reg) == 0;
>  }
>  
>  static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
> @@ -241,7 +241,8 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox,
>  	 * hardware semantics and only allow device health status.
>  	 */
>  	if (mds->security.poll_tmo_secs > 0) {
> -		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
> +		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO &&
> +		    mbox_cmd->opcode != CXL_MBOX_OP_REQUEST_ABORT_BG_OP)
>  			return -EBUSY;
>  	}
>  
> @@ -335,11 +336,64 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox,
>  	return 0;
>  }
>  
> +/*
> + * Return true implies that the request was successful and the on-going
> + * background operation was in fact aborted. This also guarantees that
> + * the respective thread is done.
> + */
> +static bool cxl_try_to_cancel_background(struct cxl_mailbox *cxl_mbox)
> +{
> +	int rc;
> +	struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
> +	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> +	struct device *dev = cxlds->dev;
> +	struct cxl_mbox_cmd cmd = {
> +		.opcode = CXL_MBOX_OP_REQUEST_ABORT_BG_OP
> +	};
> +
> +	lockdep_assert_held(&cxl_mbox->mbox_mutex);
> +
> +	rc = __cxl_pci_mbox_send_cmd(cxl_mbox, &cmd);
> +	if (rc) {
> +		dev_dbg(dev, "Failed to send abort request : %d\n", rc);
> +		return false;
> +	}
> +
> +	if (!cxl_mbox_background_complete(cxlds))
> +		return false;
> +
> +	if (mds->security.sanitize_active) {
> +		/*
> +		 * Cancel the work and cleanup on its behalf - we hold
> +		 * the mbox_mutex, cannot race with cxl_mbox_sanitize_work().
> +		 */
> +		cancel_delayed_work_sync(&mds->security.poll_dwork);
> +		mds->security.poll_tmo_secs = 0;
> +		if (mds->security.sanitize_node)
> +			sysfs_notify_dirent(mds->security.sanitize_node);
> +		mds->security.sanitize_active = false;

Should this line happen before the sysfs notification?

DJ

> +
> +		dev_dbg(cxlds->dev, "Sanitization operation aborted\n");
> +	} else {
> +		/*
> +		 * Kick the poller and wait for it to be done - no one else
> +		 * can touch mbox regs. rcuwait_wake_up() provides full
> +		 * barriers such that wake up occurs before waiting on the
> +		 * bgpoll atomic to be cleared.
> +		 */
> +		rcuwait_wake_up(&cxl_mbox->mbox_wait);
> +		atomic_cond_read_acquire(&cxl_mbox->poll_bgop, !VAL);
> +	}
> +
> +	return true;
> +}
> +
>  static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
>  			     struct cxl_mbox_cmd *cmd)
>  {
>  	int rc;
>  	struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
> +	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>  	struct device *dev = cxlds->dev;
>  
>  	mutex_lock_io(&cxl_mbox->mbox_mutex);
> @@ -348,10 +402,18 @@ static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
>  	 * each other: no new bg operation can occur in between while polling.
>  	 */
>  	if (cxl_is_background_cmd(cmd->opcode)) {
> -		if (atomic_read_acquire(&cxl_mbox->poll_bgop)) {
> -			mutex_unlock(&cxl_mbox->mbox_mutex);
> -			return -EBUSY;
> +		if (mds->security.sanitize_active ||
> +		    atomic_read_acquire(&cxl_mbox->poll_bgop)) {
> +			if (!cxl_try_to_cancel_background(cxl_mbox)) {
> +				mutex_unlock(&cxl_mbox->mbox_mutex);
> +				return -EBUSY;
> +			}
>  		}
> +		/*
> +		 * ... at this point we know that the canceled
> +		 * bgop context is gone, and we are the _only_
> +		 * background command in town. Proceed to send it.
> +		 */
>  	}
>  
>  	rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd);
> @@ -394,10 +456,11 @@ static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
>  				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
>  		cmd->return_code = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
>  					     bg_status_reg);
> -		dev_dbg(dev,
> -			"Mailbox background operation (0x%04x) completed\n",
> -			cmd->opcode);
> +
> +		dev_dbg(dev, "Mailbox background operation (0x%04x) %s\n",
> +			cmd->opcode, !cmd->return_code ? "completed":"aborted");
>  done:
> +		/* ensure clearing poll_bop is the last operation */
>  		atomic_set_release(&cxl_mbox->poll_bgop, 0);
>  	}
>

Re: [PATCH 2/3] cxl/mbox: support aborting the current background operation

Posted by Davidlohr Bueso 1 year ago

On Tue, 14 Jan 2025, Dave Jiang wrote:

>On 10/21/24 8:18 PM, Davidlohr Bueso wrote:
>> CXL 3.1 introduced the ability to request that the current on-going
>> background command be aborted. Add support for this, where the current
>> policy is for the request to occur whenever a new incoming bg command
>> wants to run. As such everything is left to user discretion and it
>> becomes impossible to hog the device/mailbox.
>
>Are you trying to say that the patch is changing the current behavior to where every time a new bg command comes in, it will abort the previous one?

Yes.

>
>>
>> The context of doing the cancellation request is the same as the new
>> incoming command, and will always hold the mbox_mutex, guaranteeing
>> that any successful cancel does not race with a third thread coming
>> in and stealing the effort.
>>
>> - For Sanitize, the thread doing the will cancel the work, and clean
>
>doing the? seems to be missing a word here.

'doing the request', will update.

...

>> +/*
>> + * Return true implies that the request was successful and the on-going
>> + * background operation was in fact aborted. This also guarantees that
>> + * the respective thread is done.
>> + */
>> +static bool cxl_try_to_cancel_background(struct cxl_mailbox *cxl_mbox)
>> +{
>> +	int rc;
>> +	struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
>> +	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>> +	struct device *dev = cxlds->dev;
>> +	struct cxl_mbox_cmd cmd = {
>> +		.opcode = CXL_MBOX_OP_REQUEST_ABORT_BG_OP
>> +	};
>> +
>> +	lockdep_assert_held(&cxl_mbox->mbox_mutex);
>> +
>> +	rc = __cxl_pci_mbox_send_cmd(cxl_mbox, &cmd);
>> +	if (rc) {
>> +		dev_dbg(dev, "Failed to send abort request : %d\n", rc);
>> +		return false;
>> +	}
>> +
>> +	if (!cxl_mbox_background_complete(cxlds))
>> +		return false;
>> +
>> +	if (mds->security.sanitize_active) {
>> +		/*
>> +		 * Cancel the work and cleanup on its behalf - we hold
>> +		 * the mbox_mutex, cannot race with cxl_mbox_sanitize_work().
>> +		 */
>> +		cancel_delayed_work_sync(&mds->security.poll_dwork);
>> +		mds->security.poll_tmo_secs = 0;
>> +		if (mds->security.sanitize_node)
>> +			sysfs_notify_dirent(mds->security.sanitize_node);
>> +		mds->security.sanitize_active = false;
>
>Should this line happen before the sysfs notification?

It is benign as we hold the lock, but yes. I will also abstract this in a helper,
such that both cxl_mbox_sanitize_work() and cxl_try_to_cancel_background() can
use it.

Thanks,
Davidlohr

Re: [PATCH 2/3] cxl/mbox: support aborting the current background operation

Posted by Dave Jiang 1 year ago


On 1/14/25 12:33 PM, Davidlohr Bueso wrote:
> On Tue, 14 Jan 2025, Dave Jiang wrote:
> 
>> On 10/21/24 8:18 PM, Davidlohr Bueso wrote:
>>> CXL 3.1 introduced the ability to request that the current on-going
>>> background command be aborted. Add support for this, where the current
>>> policy is for the request to occur whenever a new incoming bg command
>>> wants to run. As such everything is left to user discretion and it
>>> becomes impossible to hog the device/mailbox.
>>
>> Are you trying to say that the patch is changing the current behavior to where every time a new bg command comes in, it will abort the previous one?
> 
> Yes.
Perhaps consider:
Add support for the policy where a new command request will supersede and abort the current running background command. With this new behavior, everything is left to the user's discretion. It will no longer be possible to hog the device/mailbox.

DJ

> 
>>
>>>
>>> The context of doing the cancellation request is the same as the new
>>> incoming command, and will always hold the mbox_mutex, guaranteeing
>>> that any successful cancel does not race with a third thread coming
>>> in and stealing the effort.
>>>
>>> - For Sanitize, the thread doing the will cancel the work, and clean
>>
>> doing the? seems to be missing a word here.
> 
> 'doing the request', will update.
> 
> ...
> 
>>> +/*
>>> + * Return true implies that the request was successful and the on-going
>>> + * background operation was in fact aborted. This also guarantees that
>>> + * the respective thread is done.
>>> + */
>>> +static bool cxl_try_to_cancel_background(struct cxl_mailbox *cxl_mbox)
>>> +{
>>> +    int rc;
>>> +    struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
>>> +    struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>>> +    struct device *dev = cxlds->dev;
>>> +    struct cxl_mbox_cmd cmd = {
>>> +        .opcode = CXL_MBOX_OP_REQUEST_ABORT_BG_OP
>>> +    };
>>> +
>>> +    lockdep_assert_held(&cxl_mbox->mbox_mutex);
>>> +
>>> +    rc = __cxl_pci_mbox_send_cmd(cxl_mbox, &cmd);
>>> +    if (rc) {
>>> +        dev_dbg(dev, "Failed to send abort request : %d\n", rc);
>>> +        return false;
>>> +    }
>>> +
>>> +    if (!cxl_mbox_background_complete(cxlds))
>>> +        return false;
>>> +
>>> +    if (mds->security.sanitize_active) {
>>> +        /*
>>> +         * Cancel the work and cleanup on its behalf - we hold
>>> +         * the mbox_mutex, cannot race with cxl_mbox_sanitize_work().
>>> +         */
>>> +        cancel_delayed_work_sync(&mds->security.poll_dwork);
>>> +        mds->security.poll_tmo_secs = 0;
>>> +        if (mds->security.sanitize_node)
>>> +            sysfs_notify_dirent(mds->security.sanitize_node);
>>> +        mds->security.sanitize_active = false;
>>
>> Should this line happen before the sysfs notification?
> 
> It is benign as we hold the lock, but yes. I will also abstract this in a helper,
> such that both cxl_mbox_sanitize_work() and cxl_try_to_cancel_background() can
> use it.
> 
> Thanks,
> Davidlohr
>

[PATCH 1/3] cxl/pci: lockless background synchronous polling
[PATCH 2/3] cxl/mbox: support aborting the current background operation
[PATCH 3/3] cxl/pci: rename cxl_mbox_background_complete()