[v3] Add Per-transport SCMI debug statistics

[PATCH v3 3/5] firmware: arm_scmi: Track basic SCMI statistics

Posted by Luke Parkin 1 year, 6 months ago

Add tracking of initial statistics

Signed-off-by: Luke Parkin <luke.parkin@arm.com>
---
V2->V3
Add more statistics
Use new log_stats method.
V1->V2
Drop unneccesary atomic_set's
Use new 'scmi_log_stats' to simplify incrementing of atomics
Move scmi_log_stats to locations which mean no extra conditionals
	are needed
---
 drivers/firmware/arm_scmi/driver.c | 39 ++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 6edec6ec912d..b22f104cda36 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -126,6 +126,20 @@ struct scmi_debug_info {
 };
 
 enum debug_stat_counters {
+	SENT_OK,
+	SENT_FAIL,
+	SENT_FAIL_POLLING_UNSUPPORTED,
+	SENT_FAIL_CHANNEL_NOT_FOUND,
+	RESPONSE_OK,
+	NOTIF_OK,
+	DLYD_RESPONSE_OK,
+	XFERS_RESPONSE_TIMEOUT,
+	XFERS_RESPONSE_POLLED_TIMEOUT,
+	RESPONSE_POLLED_OK,
+	ERR_MSG_UNEXPECTED,
+	ERR_MSG_INVALID,
+	ERR_MSG_NOMEM,
+	ERR_PROTOCOL,
 	LAST
 };
 
@@ -994,6 +1008,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr)
 		spin_unlock_irqrestore(&minfo->xfer_lock, flags);
 
 		scmi_bad_message_trace(cinfo, msg_hdr, MSG_UNEXPECTED);
+		scmi_log_stats(info->dbg_stats, ERR_MSG_UNEXPECTED);
 
 		return xfer;
 	}
@@ -1021,6 +1036,8 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr)
 			msg_type, xfer_id, msg_hdr, xfer->state);
 
 		scmi_bad_message_trace(cinfo, msg_hdr, MSG_INVALID);
+		scmi_log_stats(info->dbg_stats, ERR_MSG_INVALID);
+
 
 		/* On error the refcount incremented above has to be dropped */
 		__scmi_xfer_put(minfo, xfer);
@@ -1060,6 +1077,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo,
 			PTR_ERR(xfer));
 
 		scmi_bad_message_trace(cinfo, msg_hdr, MSG_NOMEM);
+		scmi_log_stats(info->dbg_stats, ERR_MSG_NOMEM);
 
 		scmi_clear_channel(info, cinfo);
 		return;
@@ -1075,6 +1093,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo,
 	trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id,
 			    xfer->hdr.id, "NOTI", xfer->hdr.seq,
 			    xfer->hdr.status, xfer->rx.buf, xfer->rx.len);
+	scmi_log_stats(info->dbg_stats, NOTIF_OK);
 
 	scmi_notify(cinfo->handle, xfer->hdr.protocol_id,
 		    xfer->hdr.id, xfer->rx.buf, xfer->rx.len, ts);
@@ -1134,8 +1153,10 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo,
 	if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP) {
 		scmi_clear_channel(info, cinfo);
 		complete(xfer->async_done);
+		scmi_log_stats(info->dbg_stats, DLYD_RESPONSE_OK);
 	} else {
 		complete(&xfer->done);
+		scmi_log_stats(info->dbg_stats, RESPONSE_OK);
 	}
 
 	if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) {
@@ -1219,6 +1240,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc,
 			       struct scmi_xfer *xfer, unsigned int timeout_ms)
 {
 	int ret = 0;
+	struct scmi_info *info = handle_to_scmi_info(cinfo->handle);
 
 	if (xfer->hdr.poll_completion) {
 		/*
@@ -1239,13 +1261,12 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc,
 					"timed out in resp(caller: %pS) - polling\n",
 					(void *)_RET_IP_);
 				ret = -ETIMEDOUT;
+				scmi_log_stats(info->dbg_stats, XFERS_RESPONSE_POLLED_TIMEOUT);
 			}
 		}
 
 		if (!ret) {
 			unsigned long flags;
-			struct scmi_info *info =
-				handle_to_scmi_info(cinfo->handle);
 
 			/*
 			 * Do not fetch_response if an out-of-order delayed
@@ -1265,6 +1286,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc,
 					    "RESP" : "resp",
 					    xfer->hdr.seq, xfer->hdr.status,
 					    xfer->rx.buf, xfer->rx.len);
+			scmi_log_stats(info->dbg_stats, RESPONSE_POLLED_OK);
 
 			if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) {
 				scmi_raw_message_report(info->raw, xfer,
@@ -1279,6 +1301,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc,
 			dev_err(dev, "timed out in resp(caller: %pS)\n",
 				(void *)_RET_IP_);
 			ret = -ETIMEDOUT;
+			scmi_log_stats(info->dbg_stats, XFERS_RESPONSE_TIMEOUT);
 		}
 	}
 
@@ -1362,13 +1385,15 @@ static int do_xfer(const struct scmi_protocol_handle *ph,
 	    !is_transport_polling_capable(info->desc)) {
 		dev_warn_once(dev,
 			      "Polling mode is not supported by transport.\n");
+		scmi_log_stats(info->dbg_stats, SENT_FAIL_POLLING_UNSUPPORTED);
 		return -EINVAL;
 	}
 
 	cinfo = idr_find(&info->tx_idr, pi->proto->id);
-	if (unlikely(!cinfo))
+	if (unlikely(!cinfo)) {
+		scmi_log_stats(info->dbg_stats, SENT_FAIL_CHANNEL_NOT_FOUND);
 		return -EINVAL;
-
+	}
 	/* True ONLY if also supported by transport. */
 	if (is_polling_enabled(cinfo, info->desc))
 		xfer->hdr.poll_completion = true;
@@ -1400,16 +1425,20 @@ static int do_xfer(const struct scmi_protocol_handle *ph,
 	ret = info->desc->ops->send_message(cinfo, xfer);
 	if (ret < 0) {
 		dev_dbg(dev, "Failed to send message %d\n", ret);
+		scmi_log_stats(info->dbg_stats, SENT_FAIL);
 		return ret;
 	}
 
 	trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id,
 			    xfer->hdr.id, "CMND", xfer->hdr.seq,
 			    xfer->hdr.status, xfer->tx.buf, xfer->tx.len);
+	scmi_log_stats(info->dbg_stats, SENT_OK);
 
 	ret = scmi_wait_for_message_response(cinfo, xfer);
-	if (!ret && xfer->hdr.status)
+	if (!ret && xfer->hdr.status) {
 		ret = scmi_to_linux_errno(xfer->hdr.status);
+		scmi_log_stats(info->dbg_stats, ERR_PROTOCOL);
+	}
 
 	if (info->desc->ops->mark_txdone)
 		info->desc->ops->mark_txdone(cinfo, ret, xfer);
-- 
2.34.1

Re: [PATCH v3 3/5] firmware: arm_scmi: Track basic SCMI statistics

Posted by Cristian Marussi 1 year, 6 months ago

On Mon, Jul 15, 2024 at 02:37:49PM +0100, Luke Parkin wrote:
> Add tracking of initial statistics
> 
> Signed-off-by: Luke Parkin <luke.parkin@arm.com>
> ---
> V2->V3
> Add more statistics
> Use new log_stats method.
> V1->V2
> Drop unneccesary atomic_set's
> Use new 'scmi_log_stats' to simplify incrementing of atomics
> Move scmi_log_stats to locations which mean no extra conditionals
> 	are needed
> ---
>  drivers/firmware/arm_scmi/driver.c | 39 ++++++++++++++++++++++++++----
>  1 file changed, 34 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
> index 6edec6ec912d..b22f104cda36 100644
> --- a/drivers/firmware/arm_scmi/driver.c
> +++ b/drivers/firmware/arm_scmi/driver.c
> @@ -126,6 +126,20 @@ struct scmi_debug_info {
>  };
>  
>  enum debug_stat_counters {
> +	SENT_OK,
> +	SENT_FAIL,
> +	SENT_FAIL_POLLING_UNSUPPORTED,
> +	SENT_FAIL_CHANNEL_NOT_FOUND,
> +	RESPONSE_OK,
> +	NOTIF_OK,
> +	DLYD_RESPONSE_OK,
> +	XFERS_RESPONSE_TIMEOUT,
> +	XFERS_RESPONSE_POLLED_TIMEOUT,
> +	RESPONSE_POLLED_OK,
> +	ERR_MSG_UNEXPECTED,
> +	ERR_MSG_INVALID,
> +	ERR_MSG_NOMEM,
> +	ERR_PROTOCOL,
>  	LAST
>  };
>  
> @@ -994,6 +1008,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr)
>  		spin_unlock_irqrestore(&minfo->xfer_lock, flags);
>  
>  		scmi_bad_message_trace(cinfo, msg_hdr, MSG_UNEXPECTED);
> +		scmi_log_stats(info->dbg_stats, ERR_MSG_UNEXPECTED);

I'd be tempted to say why dont you wrap these scmi_log_stats() inside the
scmi_bad_message_trace() ... BUT in order to avoid an additional
conditional inside the scmi_bad_message_trace() you will need to somehow
remap the MSG_UNEXPECTED to ERR_MSG_UNEXPECTED inside scmi_bad_message_trace (lets say
with a local onstack array indexed by -err)...AND that would mean committing to keep
such mapping in-sync with the the above enum, so as to avoid that adding
a new definition into scmi_bad_msg BUT not to debug_stat_counters will
end up in a buffer overflow....so at the end probably better/safer to keep it
this way...

Thanks,
Cristian

Re: [PATCH v3 3/5] firmware: arm_scmi: Track basic SCMI statistics

Posted by Luke Parkin 1 year, 6 months ago

>> @@ -994,6 +1008,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr)
>>  		spin_unlock_irqrestore(&minfo->xfer_lock, flags);
>>  
>>  		scmi_bad_message_trace(cinfo, msg_hdr, MSG_UNEXPECTED);
>> +		scmi_log_stats(info->dbg_stats, ERR_MSG_UNEXPECTED);
> 
> I'd be tempted to say why dont you wrap these scmi_log_stats() inside the
> scmi_bad_message_trace() ... BUT in order to avoid an additional
> conditional inside the scmi_bad_message_trace() you will need to somehow
> remap the MSG_UNEXPECTED to ERR_MSG_UNEXPECTED inside scmi_bad_message_trace (lets say
> with a local onstack array indexed by -err)...AND that would mean committing to keep
> such mapping in-sync with the the above enum, so as to avoid that adding
> a new definition into scmi_bad_msg BUT not to debug_stat_counters will
> end up in a buffer overflow....so at the end probably better/safer to keep it
> this way...

Yeah, I had the same thoughts and came to the same conclusion. As far as I could
determine there weren't enough scmi_bad_message_trace()'s to warrant the extra
complexity (both code and processing) of centralising it.

Thanks,
Luke

[PATCH v3 1/5] firmware: arm_scmi: Remove superfluous handle_to_scmi_info
[PATCH v3 2/5] firmware: arm_scmi: Add support for tracking statistics
[PATCH v3 3/5] firmware: arm_scmi: Track basic SCMI statistics
[PATCH v3 4/5] firmware: arm_scmi: Create debugfs files for statistics
[PATCH v3 5/5] firmware: arm_scmi: Reset statistics