Up until now, we've been aggregating the bandwidth values and only
dividing them by the bus width of the source node. This was completely
wrong, as different nodes on a given path may (and usually do) have
varying bus widths. That in turn, resulted in the calculated clock rates
being completely bogus - usually they ended up being much higher, as
NoC_A<->NoC_B links are very wide.
Since we're not using the aggregate bandwidth value for anything other
than clock rate calculations, remodel qcom_icc_bus_aggregate() to
calculate the per-context clock rate for a given provider, taking into
account the bus width of every individual node.
Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
Reported-by: Stephan Gerhold <stephan@gerhold.net>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
---
drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
1 file changed, 19 insertions(+), 40 deletions(-)
diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
index 1508233632f6..d177a76abe2a 100644
--- a/drivers/interconnect/qcom/icc-rpm.c
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
}
/**
- * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
+ * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
* @provider: generic interconnect provider
- * @agg_avg: an array for aggregated average bandwidth of buckets
- * @agg_peak: an array for aggregated peak bandwidth of buckets
- * @max_agg_avg: pointer to max value of aggregated average bandwidth
+ * @agg_clk_rate: array containing the aggregated clock rates in kHz
*/
-static void qcom_icc_bus_aggregate(struct icc_provider *provider,
- u64 *agg_avg, u64 *agg_peak,
- u64 *max_agg_avg)
+static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
{
- struct icc_node *node;
+ u64 agg_avg_rate, agg_rate;
struct qcom_icc_node *qn;
- u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
+ struct icc_node *node;
int i;
- /* Initialise aggregate values */
- for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
- agg_avg[i] = 0;
- agg_peak[i] = 0;
- }
-
- *max_agg_avg = 0;
-
/*
- * Iterate nodes on the interconnect and aggregate bandwidth
- * requests for every bucket.
+ * Iterate nodes on the provider, aggregate bandwidth requests for
+ * every bucket and convert them into bus clock rates.
*/
list_for_each_entry(node, &provider->nodes, node_list) {
qn = node->data;
for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
if (qn->channels)
- sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
+ agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
else
- sum_avg[i] = qn->sum_avg[i];
- agg_avg[i] += sum_avg[i];
- agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
+ agg_avg_rate = qn->sum_avg[i];
+
+ agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
+ do_div(agg_rate, qn->buswidth);
+
+ agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
}
}
-
- /* Find maximum values across all buckets */
- for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
- *max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
}
static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
{
- struct qcom_icc_provider *qp;
struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
+ u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
struct icc_provider *provider;
+ struct qcom_icc_provider *qp;
u64 active_rate, sleep_rate;
- u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
- u64 max_agg_avg;
int ret;
src_qn = src->data;
@@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
provider = src->provider;
qp = to_qcom_provider(provider);
- qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
+ qcom_icc_bus_aggregate(provider, agg_clk_rate);
+ active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
+ sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
if (ret)
@@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
if (!qp->bus_clk_desc && !qp->bus_clk)
return 0;
- /* Intentionally keep the rates in kHz as that's what RPM accepts */
- active_rate = max(agg_avg[QCOM_SMD_RPM_ACTIVE_STATE],
- agg_peak[QCOM_SMD_RPM_ACTIVE_STATE]);
- do_div(active_rate, src_qn->buswidth);
-
- sleep_rate = max(agg_avg[QCOM_SMD_RPM_SLEEP_STATE],
- agg_peak[QCOM_SMD_RPM_SLEEP_STATE]);
- do_div(sleep_rate, src_qn->buswidth);
-
/*
* Downstream checks whether the requested rate is zero, but it makes little sense
* to vote for a value that's below the lower threshold, so let's not do so.
--
2.41.0
On Fri, Jun 09, 2023 at 10:19:27PM +0200, Konrad Dybcio wrote:
> Up until now, we've been aggregating the bandwidth values and only
> dividing them by the bus width of the source node. This was completely
> wrong, as different nodes on a given path may (and usually do) have
> varying bus widths. That in turn, resulted in the calculated clock rates
> being completely bogus - usually they ended up being much higher, as
> NoC_A<->NoC_B links are very wide.
>
> Since we're not using the aggregate bandwidth value for anything other
> than clock rate calculations, remodel qcom_icc_bus_aggregate() to
> calculate the per-context clock rate for a given provider, taking into
> account the bus width of every individual node.
>
> Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
> Reported-by: Stephan Gerhold <stephan@gerhold.net>
> Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
> ---
> drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
> 1 file changed, 19 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
> index 1508233632f6..d177a76abe2a 100644
> --- a/drivers/interconnect/qcom/icc-rpm.c
> +++ b/drivers/interconnect/qcom/icc-rpm.c
> @@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
> }
>
> /**
> - * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
> + * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
> * @provider: generic interconnect provider
> - * @agg_avg: an array for aggregated average bandwidth of buckets
> - * @agg_peak: an array for aggregated peak bandwidth of buckets
> - * @max_agg_avg: pointer to max value of aggregated average bandwidth
> + * @agg_clk_rate: array containing the aggregated clock rates in kHz
> */
> -static void qcom_icc_bus_aggregate(struct icc_provider *provider,
> - u64 *agg_avg, u64 *agg_peak,
> - u64 *max_agg_avg)
> +static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
> {
> - struct icc_node *node;
> + u64 agg_avg_rate, agg_rate;
> struct qcom_icc_node *qn;
> - u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
> + struct icc_node *node;
> int i;
>
> - /* Initialise aggregate values */
> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
> - agg_avg[i] = 0;
> - agg_peak[i] = 0;
> - }
> -
> - *max_agg_avg = 0;
> -
> /*
> - * Iterate nodes on the interconnect and aggregate bandwidth
> - * requests for every bucket.
> + * Iterate nodes on the provider, aggregate bandwidth requests for
> + * every bucket and convert them into bus clock rates.
> */
> list_for_each_entry(node, &provider->nodes, node_list) {
> qn = node->data;
> for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
> if (qn->channels)
> - sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
> + agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
> else
> - sum_avg[i] = qn->sum_avg[i];
> - agg_avg[i] += sum_avg[i];
> - agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
> + agg_avg_rate = qn->sum_avg[i];
> +
> + agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
> + do_div(agg_rate, qn->buswidth);
> +
> + agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
> }
> }
> -
> - /* Find maximum values across all buckets */
> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
> - *max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
> }
>
> static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> {
> - struct qcom_icc_provider *qp;
> struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
> + u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
> struct icc_provider *provider;
> + struct qcom_icc_provider *qp;
> u64 active_rate, sleep_rate;
> - u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
> - u64 max_agg_avg;
> int ret;
>
> src_qn = src->data;
> @@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> provider = src->provider;
> qp = to_qcom_provider(provider);
>
> - qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
> + qcom_icc_bus_aggregate(provider, agg_clk_rate);
> + active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
> + sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
>
> ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
> if (ret)
> @@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> if (!qp->bus_clk_desc && !qp->bus_clk)
> return 0;
>
> - /* Intentionally keep the rates in kHz as that's what RPM accepts */
I kind of liked this comment because otherwise it's not obvious why
you're not converting from "ICC units" anywhere.
Anyway:
Reviewed-by: Stephan Gerhold <stephan@gerhold.net>
Thanks for going through the giant maze to fix this!
Stephan
On 10.06.2023 21:06, Stephan Gerhold wrote:
> On Fri, Jun 09, 2023 at 10:19:27PM +0200, Konrad Dybcio wrote:
>> Up until now, we've been aggregating the bandwidth values and only
>> dividing them by the bus width of the source node. This was completely
>> wrong, as different nodes on a given path may (and usually do) have
>> varying bus widths. That in turn, resulted in the calculated clock rates
>> being completely bogus - usually they ended up being much higher, as
>> NoC_A<->NoC_B links are very wide.
>>
>> Since we're not using the aggregate bandwidth value for anything other
>> than clock rate calculations, remodel qcom_icc_bus_aggregate() to
>> calculate the per-context clock rate for a given provider, taking into
>> account the bus width of every individual node.
>>
>> Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
>> Reported-by: Stephan Gerhold <stephan@gerhold.net>
>> Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
>> ---
>> drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
>> 1 file changed, 19 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
>> index 1508233632f6..d177a76abe2a 100644
>> --- a/drivers/interconnect/qcom/icc-rpm.c
>> +++ b/drivers/interconnect/qcom/icc-rpm.c
>> @@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
>> }
>>
>> /**
>> - * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
>> + * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
>> * @provider: generic interconnect provider
>> - * @agg_avg: an array for aggregated average bandwidth of buckets
>> - * @agg_peak: an array for aggregated peak bandwidth of buckets
>> - * @max_agg_avg: pointer to max value of aggregated average bandwidth
>> + * @agg_clk_rate: array containing the aggregated clock rates in kHz
>> */
>> -static void qcom_icc_bus_aggregate(struct icc_provider *provider,
>> - u64 *agg_avg, u64 *agg_peak,
>> - u64 *max_agg_avg)
>> +static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
>> {
>> - struct icc_node *node;
>> + u64 agg_avg_rate, agg_rate;
>> struct qcom_icc_node *qn;
>> - u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
>> + struct icc_node *node;
>> int i;
>>
>> - /* Initialise aggregate values */
>> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
>> - agg_avg[i] = 0;
>> - agg_peak[i] = 0;
>> - }
>> -
>> - *max_agg_avg = 0;
>> -
>> /*
>> - * Iterate nodes on the interconnect and aggregate bandwidth
>> - * requests for every bucket.
>> + * Iterate nodes on the provider, aggregate bandwidth requests for
>> + * every bucket and convert them into bus clock rates.
>> */
>> list_for_each_entry(node, &provider->nodes, node_list) {
>> qn = node->data;
>> for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
>> if (qn->channels)
>> - sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
>> + agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
>> else
>> - sum_avg[i] = qn->sum_avg[i];
>> - agg_avg[i] += sum_avg[i];
>> - agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
>> + agg_avg_rate = qn->sum_avg[i];
>> +
>> + agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
>> + do_div(agg_rate, qn->buswidth);
>> +
>> + agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
>> }
>> }
>> -
>> - /* Find maximum values across all buckets */
>> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
>> - *max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
>> }
>>
>> static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>> {
>> - struct qcom_icc_provider *qp;
>> struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
>> + u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
>> struct icc_provider *provider;
>> + struct qcom_icc_provider *qp;
>> u64 active_rate, sleep_rate;
>> - u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
>> - u64 max_agg_avg;
>> int ret;
>>
>> src_qn = src->data;
>> @@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>> provider = src->provider;
>> qp = to_qcom_provider(provider);
>>
>> - qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
>> + qcom_icc_bus_aggregate(provider, agg_clk_rate);
>> + active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
>> + sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
>>
>> ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
>> if (ret)
>> @@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>> if (!qp->bus_clk_desc && !qp->bus_clk)
>> return 0;
>>
>> - /* Intentionally keep the rates in kHz as that's what RPM accepts */
>
> I kind of liked this comment because otherwise it's not obvious why
> you're not converting from "ICC units" anywhere.
I figured the kerneldoc change would be enough:
@agg_clk_rate: array containing the aggregated clock rates in kHz
>
> Anyway:
>
> Reviewed-by: Stephan Gerhold <stephan@gerhold.net>
>
> Thanks for going through the giant maze to fix this!
Thanks for being there along the way.. This spaghetti is far too much
for a single human..
Konrad
>
> Stephan
© 2016 - 2026 Red Hat, Inc.