From: Aaron Kling <webgeek1234@gmail.com>
This adds support for dynamic frequency scaling of external memory on
devices with bpmp firmware that does not support bwmgr.
Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
---
drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
1 file changed, 130 insertions(+), 2 deletions(-)
diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
--- a/drivers/memory/tegra/tegra186-emc.c
+++ b/drivers/memory/tegra/tegra186-emc.c
@@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
unsigned long rate;
};
+enum emc_rate_request_type {
+ EMC_RATE_DEBUG,
+ EMC_RATE_ICC,
+ EMC_RATE_TYPE_MAX,
+};
+
+struct emc_rate_request {
+ unsigned long min_rate;
+ unsigned long max_rate;
+};
+
struct tegra186_emc {
struct tegra_bpmp *bpmp;
struct device *dev;
@@ -33,8 +44,90 @@ struct tegra186_emc {
} debugfs;
struct icc_provider provider;
+
+ /*
+ * There are multiple sources in the EMC driver which could request
+ * a min/max clock rate, these rates are contained in this array.
+ */
+ struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
+
+ /* protect shared rate-change code path */
+ struct mutex rate_lock;
};
+static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
+{
+ unsigned int i;
+
+ for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
+ emc->requested_rate[i].min_rate = 0;
+ emc->requested_rate[i].max_rate = ULONG_MAX;
+ }
+}
+
+static int emc_request_rate(struct tegra186_emc *emc,
+ unsigned long new_min_rate,
+ unsigned long new_max_rate,
+ enum emc_rate_request_type type)
+{
+ struct emc_rate_request *req = emc->requested_rate;
+ unsigned long min_rate = 0, max_rate = ULONG_MAX;
+ unsigned int i;
+ int err;
+
+ /* select minimum and maximum rates among the requested rates */
+ for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
+ if (i == type) {
+ min_rate = max(new_min_rate, min_rate);
+ max_rate = min(new_max_rate, max_rate);
+ } else {
+ min_rate = max(req->min_rate, min_rate);
+ max_rate = min(req->max_rate, max_rate);
+ }
+ }
+
+ if (min_rate > max_rate) {
+ dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
+ __func__, type, min_rate, max_rate);
+ return -ERANGE;
+ }
+
+ err = clk_set_rate(emc->clk, min_rate);
+ if (err)
+ return err;
+
+ emc->requested_rate[type].min_rate = new_min_rate;
+ emc->requested_rate[type].max_rate = new_max_rate;
+
+ return 0;
+}
+
+static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
+ enum emc_rate_request_type type)
+{
+ struct emc_rate_request *req = &emc->requested_rate[type];
+ int ret;
+
+ mutex_lock(&emc->rate_lock);
+ ret = emc_request_rate(emc, rate, req->max_rate, type);
+ mutex_unlock(&emc->rate_lock);
+
+ return ret;
+}
+
+static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
+ enum emc_rate_request_type type)
+{
+ struct emc_rate_request *req = &emc->requested_rate[type];
+ int ret;
+
+ mutex_lock(&emc->rate_lock);
+ ret = emc_request_rate(emc, req->min_rate, rate, type);
+ mutex_unlock(&emc->rate_lock);
+
+ return ret;
+}
+
/*
* debugfs interface
*
@@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
if (!tegra186_emc_validate_rate(emc, rate))
return -EINVAL;
- err = clk_set_min_rate(emc->clk, rate);
+ err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
if (err < 0)
return err;
@@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
if (!tegra186_emc_validate_rate(emc, rate))
return -EINVAL;
- err = clk_set_max_rate(emc->clk, rate);
+ err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
if (err < 0)
return err;
@@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
return 0;
}
+static inline struct tegra186_emc *
+to_tegra186_emc_provider(struct icc_provider *provider)
+{
+ return container_of(provider, struct tegra186_emc, provider);
+}
+
/*
* tegra186_emc_icc_set_bw() - Set BW api for EMC provider
* @src: ICC node for External Memory Controller (EMC)
@@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
*/
static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
{
+ struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
+ struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
+ unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
+ unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
+ unsigned long long rate = max(avg_bw, peak_bw);
+ const unsigned int ddr = 2;
+ int err;
+
+ /*
+ * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
+ * Freq based on the passed values.
+ */
+ if (mc->bwmgr_mrq_supported)
+ return 0;
+
+ /*
+ * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
+ * EMC clock rate is twice smaller than the peak data rate because
+ * data is sampled on both EMC clock edges.
+ */
+ do_div(rate, ddr);
+ rate = min_t(u64, rate, U32_MAX);
+
+ err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
+ if (err)
+ return err;
+
return 0;
}
@@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, emc);
emc->dev = &pdev->dev;
+ tegra186_emc_rate_requests_init(emc);
+
if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
err = tegra186_emc_get_emc_dvfs_latency(emc);
if (err)
--
2.51.0
On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
> From: Aaron Kling <webgeek1234@gmail.com>
>
> This adds support for dynamic frequency scaling of external memory on
> devices with bpmp firmware that does not support bwmgr.
>
> Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
> ---
> drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
> 1 file changed, 130 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
> index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
> --- a/drivers/memory/tegra/tegra186-emc.c
> +++ b/drivers/memory/tegra/tegra186-emc.c
> @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
> unsigned long rate;
> };
>
> +enum emc_rate_request_type {
> + EMC_RATE_DEBUG,
> + EMC_RATE_ICC,
> + EMC_RATE_TYPE_MAX,
> +};
> +
> +struct emc_rate_request {
> + unsigned long min_rate;
> + unsigned long max_rate;
> +};
> +
> struct tegra186_emc {
> struct tegra_bpmp *bpmp;
> struct device *dev;
> @@ -33,8 +44,90 @@ struct tegra186_emc {
> } debugfs;
>
> struct icc_provider provider;
> +
> + /*
> + * There are multiple sources in the EMC driver which could request
> + * a min/max clock rate, these rates are contained in this array.
> + */
> + struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
> +
> + /* protect shared rate-change code path */
> + struct mutex rate_lock;
> };
>
> +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
> + emc->requested_rate[i].min_rate = 0;
> + emc->requested_rate[i].max_rate = ULONG_MAX;
> + }
> +}
> +
> +static int emc_request_rate(struct tegra186_emc *emc,
> + unsigned long new_min_rate,
> + unsigned long new_max_rate,
> + enum emc_rate_request_type type)
> +{
> + struct emc_rate_request *req = emc->requested_rate;
> + unsigned long min_rate = 0, max_rate = ULONG_MAX;
> + unsigned int i;
> + int err;
> +
> + /* select minimum and maximum rates among the requested rates */
> + for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
> + if (i == type) {
> + min_rate = max(new_min_rate, min_rate);
> + max_rate = min(new_max_rate, max_rate);
> + } else {
> + min_rate = max(req->min_rate, min_rate);
> + max_rate = min(req->max_rate, max_rate);
> + }
> + }
> +
> + if (min_rate > max_rate) {
> + dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
> + __func__, type, min_rate, max_rate);
> + return -ERANGE;
> + }
> +
> + err = clk_set_rate(emc->clk, min_rate);
> + if (err)
> + return err;
> +
> + emc->requested_rate[type].min_rate = new_min_rate;
> + emc->requested_rate[type].max_rate = new_max_rate;
> +
> + return 0;
> +}
> +
> +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
> + enum emc_rate_request_type type)
> +{
> + struct emc_rate_request *req = &emc->requested_rate[type];
> + int ret;
> +
> + mutex_lock(&emc->rate_lock);
> + ret = emc_request_rate(emc, rate, req->max_rate, type);
> + mutex_unlock(&emc->rate_lock);
> +
> + return ret;
> +}
> +
> +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
> + enum emc_rate_request_type type)
> +{
> + struct emc_rate_request *req = &emc->requested_rate[type];
> + int ret;
> +
> + mutex_lock(&emc->rate_lock);
> + ret = emc_request_rate(emc, req->min_rate, rate, type);
> + mutex_unlock(&emc->rate_lock);
> +
> + return ret;
> +}
> +
> /*
> * debugfs interface
> *
> @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
> if (!tegra186_emc_validate_rate(emc, rate))
> return -EINVAL;
>
> - err = clk_set_min_rate(emc->clk, rate);
> + err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
> if (err < 0)
> return err;
>
> @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
> if (!tegra186_emc_validate_rate(emc, rate))
> return -EINVAL;
>
> - err = clk_set_max_rate(emc->clk, rate);
> + err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
> if (err < 0)
> return err;
>
> @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> return 0;
> }
>
> +static inline struct tegra186_emc *
> +to_tegra186_emc_provider(struct icc_provider *provider)
> +{
> + return container_of(provider, struct tegra186_emc, provider);
> +}
> +
> /*
> * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
> * @src: ICC node for External Memory Controller (EMC)
> @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> */
> static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
> {
> + struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
> + struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
> + unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> + unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> + unsigned long long rate = max(avg_bw, peak_bw);
> + const unsigned int ddr = 2;
> + int err;
> +
> + /*
> + * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
> + * Freq based on the passed values.
> + */
> + if (mc->bwmgr_mrq_supported)
> + return 0;
> +
> + /*
> + * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
> + * EMC clock rate is twice smaller than the peak data rate because
> + * data is sampled on both EMC clock edges.
> + */
> + do_div(rate, ddr);
> + rate = min_t(u64, rate, U32_MAX);
> +
> + err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
> + if (err)
> + return err;
> +
> return 0;
> }
>
> @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
> platform_set_drvdata(pdev, emc);
> emc->dev = &pdev->dev;
>
> + tegra186_emc_rate_requests_init(emc);
> +
> if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
> err = tegra186_emc_get_emc_dvfs_latency(emc);
> if (err)
>
FYI, this patch is causing a boot regression on Tegra194 devices. I
noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
no longer booting and bisect is pointing to this. I will have a closer
look and try to see why this is.
Jon
--
nvpublic
On Mon, Nov 10, 2025 at 3:25 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
> > From: Aaron Kling <webgeek1234@gmail.com>
> >
> > This adds support for dynamic frequency scaling of external memory on
> > devices with bpmp firmware that does not support bwmgr.
> >
> > Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
> > ---
> > drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
> > 1 file changed, 130 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
> > index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
> > --- a/drivers/memory/tegra/tegra186-emc.c
> > +++ b/drivers/memory/tegra/tegra186-emc.c
> > @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
> > unsigned long rate;
> > };
> >
> > +enum emc_rate_request_type {
> > + EMC_RATE_DEBUG,
> > + EMC_RATE_ICC,
> > + EMC_RATE_TYPE_MAX,
> > +};
> > +
> > +struct emc_rate_request {
> > + unsigned long min_rate;
> > + unsigned long max_rate;
> > +};
> > +
> > struct tegra186_emc {
> > struct tegra_bpmp *bpmp;
> > struct device *dev;
> > @@ -33,8 +44,90 @@ struct tegra186_emc {
> > } debugfs;
> >
> > struct icc_provider provider;
> > +
> > + /*
> > + * There are multiple sources in the EMC driver which could request
> > + * a min/max clock rate, these rates are contained in this array.
> > + */
> > + struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
> > +
> > + /* protect shared rate-change code path */
> > + struct mutex rate_lock;
> > };
> >
> > +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
> > +{
> > + unsigned int i;
> > +
> > + for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
> > + emc->requested_rate[i].min_rate = 0;
> > + emc->requested_rate[i].max_rate = ULONG_MAX;
> > + }
> > +}
> > +
> > +static int emc_request_rate(struct tegra186_emc *emc,
> > + unsigned long new_min_rate,
> > + unsigned long new_max_rate,
> > + enum emc_rate_request_type type)
> > +{
> > + struct emc_rate_request *req = emc->requested_rate;
> > + unsigned long min_rate = 0, max_rate = ULONG_MAX;
> > + unsigned int i;
> > + int err;
> > +
> > + /* select minimum and maximum rates among the requested rates */
> > + for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
> > + if (i == type) {
> > + min_rate = max(new_min_rate, min_rate);
> > + max_rate = min(new_max_rate, max_rate);
> > + } else {
> > + min_rate = max(req->min_rate, min_rate);
> > + max_rate = min(req->max_rate, max_rate);
> > + }
> > + }
> > +
> > + if (min_rate > max_rate) {
> > + dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
> > + __func__, type, min_rate, max_rate);
> > + return -ERANGE;
> > + }
> > +
> > + err = clk_set_rate(emc->clk, min_rate);
> > + if (err)
> > + return err;
> > +
> > + emc->requested_rate[type].min_rate = new_min_rate;
> > + emc->requested_rate[type].max_rate = new_max_rate;
> > +
> > + return 0;
> > +}
> > +
> > +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
> > + enum emc_rate_request_type type)
> > +{
> > + struct emc_rate_request *req = &emc->requested_rate[type];
> > + int ret;
> > +
> > + mutex_lock(&emc->rate_lock);
> > + ret = emc_request_rate(emc, rate, req->max_rate, type);
> > + mutex_unlock(&emc->rate_lock);
> > +
> > + return ret;
> > +}
> > +
> > +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
> > + enum emc_rate_request_type type)
> > +{
> > + struct emc_rate_request *req = &emc->requested_rate[type];
> > + int ret;
> > +
> > + mutex_lock(&emc->rate_lock);
> > + ret = emc_request_rate(emc, req->min_rate, rate, type);
> > + mutex_unlock(&emc->rate_lock);
> > +
> > + return ret;
> > +}
> > +
> > /*
> > * debugfs interface
> > *
> > @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
> > if (!tegra186_emc_validate_rate(emc, rate))
> > return -EINVAL;
> >
> > - err = clk_set_min_rate(emc->clk, rate);
> > + err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
> > if (err < 0)
> > return err;
> >
> > @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
> > if (!tegra186_emc_validate_rate(emc, rate))
> > return -EINVAL;
> >
> > - err = clk_set_max_rate(emc->clk, rate);
> > + err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
> > if (err < 0)
> > return err;
> >
> > @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> > return 0;
> > }
> >
> > +static inline struct tegra186_emc *
> > +to_tegra186_emc_provider(struct icc_provider *provider)
> > +{
> > + return container_of(provider, struct tegra186_emc, provider);
> > +}
> > +
> > /*
> > * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
> > * @src: ICC node for External Memory Controller (EMC)
> > @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> > */
> > static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
> > {
> > + struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
> > + struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
> > + unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> > + unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> > + unsigned long long rate = max(avg_bw, peak_bw);
> > + const unsigned int ddr = 2;
> > + int err;
> > +
> > + /*
> > + * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
> > + * Freq based on the passed values.
> > + */
> > + if (mc->bwmgr_mrq_supported)
> > + return 0;
> > +
> > + /*
> > + * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
> > + * EMC clock rate is twice smaller than the peak data rate because
> > + * data is sampled on both EMC clock edges.
> > + */
> > + do_div(rate, ddr);
> > + rate = min_t(u64, rate, U32_MAX);
> > +
> > + err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
> > + if (err)
> > + return err;
> > +
> > return 0;
> > }
> >
> > @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
> > platform_set_drvdata(pdev, emc);
> > emc->dev = &pdev->dev;
> >
> > + tegra186_emc_rate_requests_init(emc);
> > +
> > if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
> > err = tegra186_emc_get_emc_dvfs_latency(emc);
> > if (err)
> >
>
>
> FYI, this patch is causing a boot regression on Tegra194 devices. I
> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
> no longer booting and bisect is pointing to this. I will have a closer
> look and try to see why this is.
Interesting. Both were booting for me during my verification, though
my use case involves the dt changes that I don't believe have been
picked up yet. Thought I had explicitly verified without the dt
changes too, though. Since I was asked to do so on this or one of the
other similar series. I will try to check linux-next as-is soon.
Aaron
On Mon, Nov 10, 2025 at 3:55 PM Aaron Kling <webgeek1234@gmail.com> wrote:
>
> On Mon, Nov 10, 2025 at 3:25 PM Jon Hunter <jonathanh@nvidia.com> wrote:
> >
> >
> > On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
> > > From: Aaron Kling <webgeek1234@gmail.com>
> > >
> > > This adds support for dynamic frequency scaling of external memory on
> > > devices with bpmp firmware that does not support bwmgr.
> > >
> > > Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
> > > ---
> > > drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
> > > 1 file changed, 130 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
> > > index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
> > > --- a/drivers/memory/tegra/tegra186-emc.c
> > > +++ b/drivers/memory/tegra/tegra186-emc.c
> > > @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
> > > unsigned long rate;
> > > };
> > >
> > > +enum emc_rate_request_type {
> > > + EMC_RATE_DEBUG,
> > > + EMC_RATE_ICC,
> > > + EMC_RATE_TYPE_MAX,
> > > +};
> > > +
> > > +struct emc_rate_request {
> > > + unsigned long min_rate;
> > > + unsigned long max_rate;
> > > +};
> > > +
> > > struct tegra186_emc {
> > > struct tegra_bpmp *bpmp;
> > > struct device *dev;
> > > @@ -33,8 +44,90 @@ struct tegra186_emc {
> > > } debugfs;
> > >
> > > struct icc_provider provider;
> > > +
> > > + /*
> > > + * There are multiple sources in the EMC driver which could request
> > > + * a min/max clock rate, these rates are contained in this array.
> > > + */
> > > + struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
> > > +
> > > + /* protect shared rate-change code path */
> > > + struct mutex rate_lock;
> > > };
> > >
> > > +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
> > > +{
> > > + unsigned int i;
> > > +
> > > + for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
> > > + emc->requested_rate[i].min_rate = 0;
> > > + emc->requested_rate[i].max_rate = ULONG_MAX;
> > > + }
> > > +}
> > > +
> > > +static int emc_request_rate(struct tegra186_emc *emc,
> > > + unsigned long new_min_rate,
> > > + unsigned long new_max_rate,
> > > + enum emc_rate_request_type type)
> > > +{
> > > + struct emc_rate_request *req = emc->requested_rate;
> > > + unsigned long min_rate = 0, max_rate = ULONG_MAX;
> > > + unsigned int i;
> > > + int err;
> > > +
> > > + /* select minimum and maximum rates among the requested rates */
> > > + for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
> > > + if (i == type) {
> > > + min_rate = max(new_min_rate, min_rate);
> > > + max_rate = min(new_max_rate, max_rate);
> > > + } else {
> > > + min_rate = max(req->min_rate, min_rate);
> > > + max_rate = min(req->max_rate, max_rate);
> > > + }
> > > + }
> > > +
> > > + if (min_rate > max_rate) {
> > > + dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
> > > + __func__, type, min_rate, max_rate);
> > > + return -ERANGE;
> > > + }
> > > +
> > > + err = clk_set_rate(emc->clk, min_rate);
> > > + if (err)
> > > + return err;
> > > +
> > > + emc->requested_rate[type].min_rate = new_min_rate;
> > > + emc->requested_rate[type].max_rate = new_max_rate;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
> > > + enum emc_rate_request_type type)
> > > +{
> > > + struct emc_rate_request *req = &emc->requested_rate[type];
> > > + int ret;
> > > +
> > > + mutex_lock(&emc->rate_lock);
> > > + ret = emc_request_rate(emc, rate, req->max_rate, type);
> > > + mutex_unlock(&emc->rate_lock);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
> > > + enum emc_rate_request_type type)
> > > +{
> > > + struct emc_rate_request *req = &emc->requested_rate[type];
> > > + int ret;
> > > +
> > > + mutex_lock(&emc->rate_lock);
> > > + ret = emc_request_rate(emc, req->min_rate, rate, type);
> > > + mutex_unlock(&emc->rate_lock);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > /*
> > > * debugfs interface
> > > *
> > > @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
> > > if (!tegra186_emc_validate_rate(emc, rate))
> > > return -EINVAL;
> > >
> > > - err = clk_set_min_rate(emc->clk, rate);
> > > + err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
> > > if (err < 0)
> > > return err;
> > >
> > > @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
> > > if (!tegra186_emc_validate_rate(emc, rate))
> > > return -EINVAL;
> > >
> > > - err = clk_set_max_rate(emc->clk, rate);
> > > + err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
> > > if (err < 0)
> > > return err;
> > >
> > > @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> > > return 0;
> > > }
> > >
> > > +static inline struct tegra186_emc *
> > > +to_tegra186_emc_provider(struct icc_provider *provider)
> > > +{
> > > + return container_of(provider, struct tegra186_emc, provider);
> > > +}
> > > +
> > > /*
> > > * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
> > > * @src: ICC node for External Memory Controller (EMC)
> > > @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> > > */
> > > static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
> > > {
> > > + struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
> > > + struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
> > > + unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> > > + unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> > > + unsigned long long rate = max(avg_bw, peak_bw);
> > > + const unsigned int ddr = 2;
> > > + int err;
> > > +
> > > + /*
> > > + * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
> > > + * Freq based on the passed values.
> > > + */
> > > + if (mc->bwmgr_mrq_supported)
> > > + return 0;
> > > +
> > > + /*
> > > + * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
> > > + * EMC clock rate is twice smaller than the peak data rate because
> > > + * data is sampled on both EMC clock edges.
> > > + */
> > > + do_div(rate, ddr);
> > > + rate = min_t(u64, rate, U32_MAX);
> > > +
> > > + err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
> > > + if (err)
> > > + return err;
> > > +
> > > return 0;
> > > }
> > >
> > > @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
> > > platform_set_drvdata(pdev, emc);
> > > emc->dev = &pdev->dev;
> > >
> > > + tegra186_emc_rate_requests_init(emc);
> > > +
> > > if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
> > > err = tegra186_emc_get_emc_dvfs_latency(emc);
> > > if (err)
> > >
> >
> >
> > FYI, this patch is causing a boot regression on Tegra194 devices. I
> > noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
> > no longer booting and bisect is pointing to this. I will have a closer
> > look and try to see why this is.
>
> Interesting. Both were booting for me during my verification, though
> my use case involves the dt changes that I don't believe have been
> picked up yet. Thought I had explicitly verified without the dt
> changes too, though. Since I was asked to do so on this or one of the
> other similar series. I will try to check linux-next as-is soon.
I just built next-20251110 using the standard arm64 defconfig and
flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
and both booted to cli on a barebones busybox ramdisk. I do not see
any errors from tegra-mc, and the only error I see from tegra186-emc
is that it can't find the opp tables, which is expected without the dt
changes, and is not fatal.
Aaron
On 11/11/2025 01:39, Aaron Kling wrote:
> On Mon, Nov 10, 2025 at 3:55 PM Aaron Kling <webgeek1234@gmail.com> wrote:
>>
>> On Mon, Nov 10, 2025 at 3:25 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>>>
>>>
>>> On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
>>>> From: Aaron Kling <webgeek1234@gmail.com>
>>>>
>>>> This adds support for dynamic frequency scaling of external memory on
>>>> devices with bpmp firmware that does not support bwmgr.
>>>>
>>>> Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
>>>> ---
>>>> drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
>>>> 1 file changed, 130 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
>>>> index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
>>>> --- a/drivers/memory/tegra/tegra186-emc.c
>>>> +++ b/drivers/memory/tegra/tegra186-emc.c
>>>> @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
>>>> unsigned long rate;
>>>> };
>>>>
>>>> +enum emc_rate_request_type {
>>>> + EMC_RATE_DEBUG,
>>>> + EMC_RATE_ICC,
>>>> + EMC_RATE_TYPE_MAX,
>>>> +};
>>>> +
>>>> +struct emc_rate_request {
>>>> + unsigned long min_rate;
>>>> + unsigned long max_rate;
>>>> +};
>>>> +
>>>> struct tegra186_emc {
>>>> struct tegra_bpmp *bpmp;
>>>> struct device *dev;
>>>> @@ -33,8 +44,90 @@ struct tegra186_emc {
>>>> } debugfs;
>>>>
>>>> struct icc_provider provider;
>>>> +
>>>> + /*
>>>> + * There are multiple sources in the EMC driver which could request
>>>> + * a min/max clock rate, these rates are contained in this array.
>>>> + */
>>>> + struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
>>>> +
>>>> + /* protect shared rate-change code path */
>>>> + struct mutex rate_lock;
>>>> };
>>>>
>>>> +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
>>>> +{
>>>> + unsigned int i;
>>>> +
>>>> + for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
>>>> + emc->requested_rate[i].min_rate = 0;
>>>> + emc->requested_rate[i].max_rate = ULONG_MAX;
>>>> + }
>>>> +}
>>>> +
>>>> +static int emc_request_rate(struct tegra186_emc *emc,
>>>> + unsigned long new_min_rate,
>>>> + unsigned long new_max_rate,
>>>> + enum emc_rate_request_type type)
>>>> +{
>>>> + struct emc_rate_request *req = emc->requested_rate;
>>>> + unsigned long min_rate = 0, max_rate = ULONG_MAX;
>>>> + unsigned int i;
>>>> + int err;
>>>> +
>>>> + /* select minimum and maximum rates among the requested rates */
>>>> + for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
>>>> + if (i == type) {
>>>> + min_rate = max(new_min_rate, min_rate);
>>>> + max_rate = min(new_max_rate, max_rate);
>>>> + } else {
>>>> + min_rate = max(req->min_rate, min_rate);
>>>> + max_rate = min(req->max_rate, max_rate);
>>>> + }
>>>> + }
>>>> +
>>>> + if (min_rate > max_rate) {
>>>> + dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
>>>> + __func__, type, min_rate, max_rate);
>>>> + return -ERANGE;
>>>> + }
>>>> +
>>>> + err = clk_set_rate(emc->clk, min_rate);
>>>> + if (err)
>>>> + return err;
>>>> +
>>>> + emc->requested_rate[type].min_rate = new_min_rate;
>>>> + emc->requested_rate[type].max_rate = new_max_rate;
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
>>>> + enum emc_rate_request_type type)
>>>> +{
>>>> + struct emc_rate_request *req = &emc->requested_rate[type];
>>>> + int ret;
>>>> +
>>>> + mutex_lock(&emc->rate_lock);
>>>> + ret = emc_request_rate(emc, rate, req->max_rate, type);
>>>> + mutex_unlock(&emc->rate_lock);
>>>> +
>>>> + return ret;
>>>> +}
>>>> +
>>>> +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
>>>> + enum emc_rate_request_type type)
>>>> +{
>>>> + struct emc_rate_request *req = &emc->requested_rate[type];
>>>> + int ret;
>>>> +
>>>> + mutex_lock(&emc->rate_lock);
>>>> + ret = emc_request_rate(emc, req->min_rate, rate, type);
>>>> + mutex_unlock(&emc->rate_lock);
>>>> +
>>>> + return ret;
>>>> +}
>>>> +
>>>> /*
>>>> * debugfs interface
>>>> *
>>>> @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
>>>> if (!tegra186_emc_validate_rate(emc, rate))
>>>> return -EINVAL;
>>>>
>>>> - err = clk_set_min_rate(emc->clk, rate);
>>>> + err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
>>>> if (err < 0)
>>>> return err;
>>>>
>>>> @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
>>>> if (!tegra186_emc_validate_rate(emc, rate))
>>>> return -EINVAL;
>>>>
>>>> - err = clk_set_max_rate(emc->clk, rate);
>>>> + err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
>>>> if (err < 0)
>>>> return err;
>>>>
>>>> @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
>>>> return 0;
>>>> }
>>>>
>>>> +static inline struct tegra186_emc *
>>>> +to_tegra186_emc_provider(struct icc_provider *provider)
>>>> +{
>>>> + return container_of(provider, struct tegra186_emc, provider);
>>>> +}
>>>> +
>>>> /*
>>>> * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
>>>> * @src: ICC node for External Memory Controller (EMC)
>>>> @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
>>>> */
>>>> static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
>>>> {
>>>> + struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
>>>> + struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
>>>> + unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
>>>> + unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
>>>> + unsigned long long rate = max(avg_bw, peak_bw);
>>>> + const unsigned int ddr = 2;
>>>> + int err;
>>>> +
>>>> + /*
>>>> + * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
>>>> + * Freq based on the passed values.
>>>> + */
>>>> + if (mc->bwmgr_mrq_supported)
>>>> + return 0;
>>>> +
>>>> + /*
>>>> + * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
>>>> + * EMC clock rate is twice smaller than the peak data rate because
>>>> + * data is sampled on both EMC clock edges.
>>>> + */
>>>> + do_div(rate, ddr);
>>>> + rate = min_t(u64, rate, U32_MAX);
>>>> +
>>>> + err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
>>>> + if (err)
>>>> + return err;
>>>> +
>>>> return 0;
>>>> }
>>>>
>>>> @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
>>>> platform_set_drvdata(pdev, emc);
>>>> emc->dev = &pdev->dev;
>>>>
>>>> + tegra186_emc_rate_requests_init(emc);
>>>> +
>>>> if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
>>>> err = tegra186_emc_get_emc_dvfs_latency(emc);
>>>> if (err)
>>>>
>>>
>>>
>>> FYI, this patch is causing a boot regression on Tegra194 devices. I
>>> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
>>> no longer booting and bisect is pointing to this. I will have a closer
>>> look and try to see why this is.
>>
>> Interesting. Both were booting for me during my verification, though
>> my use case involves the dt changes that I don't believe have been
>> picked up yet. Thought I had explicitly verified without the dt
>> changes too, though. Since I was asked to do so on this or one of the
>> other similar series. I will try to check linux-next as-is soon.
>
> I just built next-20251110 using the standard arm64 defconfig and
> flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
> and both booted to cli on a barebones busybox ramdisk. I do not see
> any errors from tegra-mc, and the only error I see from tegra186-emc
> is that it can't find the opp tables, which is expected without the dt
> changes, and is not fatal.
Thanks for testing. Something is not right because our boards are
failing. So may be we are doing/testing something different. However,
this should not break. So there is a problem here.
Jon
--
nvpublic
On 11/11/2025 12:13, Jon Hunter wrote:
>>>>> +
>>>>> if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
>>>>> err = tegra186_emc_get_emc_dvfs_latency(emc);
>>>>> if (err)
>>>>>
>>>>
>>>>
>>>> FYI, this patch is causing a boot regression on Tegra194 devices. I
>>>> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
>>>> no longer booting and bisect is pointing to this. I will have a closer
>>>> look and try to see why this is.
>>>
>>> Interesting. Both were booting for me during my verification, though
>>> my use case involves the dt changes that I don't believe have been
>>> picked up yet. Thought I had explicitly verified without the dt
>>> changes too, though. Since I was asked to do so on this or one of the
>>> other similar series. I will try to check linux-next as-is soon.
>>
>> I just built next-20251110 using the standard arm64 defconfig and
>> flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
>> and both booted to cli on a barebones busybox ramdisk. I do not see
>> any errors from tegra-mc, and the only error I see from tegra186-emc
>> is that it can't find the opp tables, which is expected without the dt
>> changes, and is not fatal.
>
> Thanks for testing. Something is not right because our boards are
> failing. So may be we are doing/testing something different. However,
> this should not break. So there is a problem here.
Did you meant: "So there is NO problem here"?
I kept these for 10 days in linux-next and yesterday sent them in pull
request. If some patches are needed on top, they can still fit coming
merge window if sent soon.
Best regards,
Krzysztof
On 11/11/2025 11:16, Krzysztof Kozlowski wrote:
> On 11/11/2025 12:13, Jon Hunter wrote:
>>>>>> +
>>>>>> if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
>>>>>> err = tegra186_emc_get_emc_dvfs_latency(emc);
>>>>>> if (err)
>>>>>>
>>>>>
>>>>>
>>>>> FYI, this patch is causing a boot regression on Tegra194 devices. I
>>>>> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
>>>>> no longer booting and bisect is pointing to this. I will have a closer
>>>>> look and try to see why this is.
>>>>
>>>> Interesting. Both were booting for me during my verification, though
>>>> my use case involves the dt changes that I don't believe have been
>>>> picked up yet. Thought I had explicitly verified without the dt
>>>> changes too, though. Since I was asked to do so on this or one of the
>>>> other similar series. I will try to check linux-next as-is soon.
>>>
>>> I just built next-20251110 using the standard arm64 defconfig and
>>> flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
>>> and both booted to cli on a barebones busybox ramdisk. I do not see
>>> any errors from tegra-mc, and the only error I see from tegra186-emc
>>> is that it can't find the opp tables, which is expected without the dt
>>> changes, and is not fatal.
>>
>> Thanks for testing. Something is not right because our boards are
>> failing. So may be we are doing/testing something different. However,
>> this should not break. So there is a problem here.
>
>
> Did you meant: "So there is NO problem here"?
Nope. I mean that this is a problem here.
> I kept these for 10 days in linux-next and yesterday sent them in pull
> request. If some patches are needed on top, they can still fit coming
> merge window if sent soon.
Looking back I see it started failing with next-20251103. next-20251031
was fine. Reverting this commit on top of next-20251110 fixes the issue.
There may be a difference in the firmware being used. Our testing is
based upon an older NVIDIA L4T r32.5.1 release but nonetheless, we
should not break that.
Jon
--
nvpublic
On 11/11/2025 12:05, Jon Hunter wrote: ... >>> Thanks for testing. Something is not right because our boards are >>> failing. So may be we are doing/testing something different. However, >>> this should not break. So there is a problem here. >> >> >> Did you meant: "So there is NO problem here"? > > Nope. I mean that this is a problem here. > >> I kept these for 10 days in linux-next and yesterday sent them in pull >> request. If some patches are needed on top, they can still fit coming >> merge window if sent soon. > > Looking back I see it started failing with next-20251103. next-20251031 > was fine. Reverting this commit on top of next-20251110 fixes the issue. > > There may be a difference in the firmware being used. Our testing is > based upon an older NVIDIA L4T r32.5.1 release but nonetheless, we > should not break that. OK, so I see what is happening here. The boot test that we are running has a 2 minute timeout and the board is now failing to boot within that time. Adding some debug prints, I can see that initially the EMC clock frequency is 1600MHz and now after this change, on boot the EMC clock get set to 250MHz. Hence, the booting is now taking significantly longer and the test times out. We definitely don't want to increase the timeout of the test. Any thoughts? Jon -- nvpublic
On Tue, Nov 11, 2025 at 8:35 AM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 11/11/2025 12:05, Jon Hunter wrote: > > ... > > >>> Thanks for testing. Something is not right because our boards are > >>> failing. So may be we are doing/testing something different. However, > >>> this should not break. So there is a problem here. > >> > >> > >> Did you meant: "So there is NO problem here"? > > > > Nope. I mean that this is a problem here. > > > >> I kept these for 10 days in linux-next and yesterday sent them in pull > >> request. If some patches are needed on top, they can still fit coming > >> merge window if sent soon. > > > > Looking back I see it started failing with next-20251103. next-20251031 > > was fine. Reverting this commit on top of next-20251110 fixes the issue. > > > > There may be a difference in the firmware being used. Our testing is > > based upon an older NVIDIA L4T r32.5.1 release but nonetheless, we > > should not break that. > > > OK, so I see what is happening here. The boot test that we are running > has a 2 minute timeout and the board is now failing to boot within that > time. > > Adding some debug prints, I can see that initially the EMC clock > frequency is 1600MHz and now after this change, on boot the EMC clock > get set to 250MHz. Hence, the booting is now taking significantly longer > and the test times out. > > We definitely don't want to increase the timeout of the test. Any thoughts? My setup uses the boot stack from L4T r32.7.6, though cboot is source built and has had changes over time to support newer Android versions. There shouldn't be anything there that would affect emc clock, though. I'm seeing the emc clock stay at the boot value, namely 1600MHz. Per both debugfs clk/emc/clk_rate and bpmp/debug/clk/emc/rate. I don't even see 250MHz as an option. Debugfs emc/available_rates lists 204MHz as the closest entry. I'm trying to think what could cause a drop in the selected clock rate. This patch should only dynamically change the rate if the opp tables exist, enabling the cpufreq based scaling via icc. But those tables don't exist on linux-next right now. My test ramdisk does nothing except set up sysfs/procfs/etc just enough to run a busybox shell for debugging. Do the Nvidia regression testing boot scripts do anything to sysfs or debugfs that would affect emc? Aaron
On 11/11/2025 17:04, Aaron Kling wrote: ... > My setup uses the boot stack from L4T r32.7.6, though cboot is source > built and has had changes over time to support newer Android versions. > There shouldn't be anything there that would affect emc clock, though. > > I'm seeing the emc clock stay at the boot value, namely 1600MHz. Per > both debugfs clk/emc/clk_rate and bpmp/debug/clk/emc/rate. I don't > even see 250MHz as an option. Debugfs emc/available_rates lists 204MHz > as the closest entry. > > I'm trying to think what could cause a drop in the selected clock > rate. This patch should only dynamically change the rate if the opp > tables exist, enabling the cpufreq based scaling via icc. But those > tables don't exist on linux-next right now. My test ramdisk does > nothing except set up sysfs/procfs/etc just enough to run a busybox > shell for debugging. Do the Nvidia regression testing boot scripts do > anything to sysfs or debugfs that would affect emc? So this is definitely coming from ICC. On boot I see a request for 250MHz coming from the PCIe driver ... [ 13.861227] tegra186_emc_icc_set_bw-356: rate 250000000 [ 13.861350] CPU: 1 UID: 0 PID: 68 Comm: kworker/u32:1 Not tainted 6.18.0-rc4-next-20251110-00001-gfc12493c80fb-dirty #9 PREEMPT [ 13.861362] Hardware name: NVIDIA Jetson AGX Xavier Developer Kit (DT) [ 13.861370] Workqueue: events_unbound deferred_probe_work_func [ 13.861388] Call trace: [ 13.861393] show_stack+0x18/0x24 (C) [ 13.861407] dump_stack_lvl+0x74/0x8c [ 13.861419] dump_stack+0x18/0x24 [ 13.861426] tegra186_emc_icc_set_bw+0xc8/0x14c [ 13.861438] apply_constraints+0x70/0xb0 [ 13.861451] icc_set_bw+0x88/0x128 [ 13.861461] tegra_pcie_icc_set+0x7c/0x10c [pcie_tegra194] [ 13.861499] tegra_pcie_dw_start_link+0x178/0x2b0 [pcie_tegra194] [ 13.861510] dw_pcie_host_init+0x664/0x6e0 [ 13.861523] tegra_pcie_dw_probe+0x6d4/0xbfc [pcie_tegra194] [ 13.861534] platform_probe+0x5c/0x98 [ 13.861547] really_probe+0xbc/0x2a8 [ 13.861555] __driver_probe_device+0x78/0x12c [ 13.861563] driver_probe_device+0x3c/0x15c [ 13.861572] __device_attach_driver+0xb8/0x134 [ 13.861580] bus_for_each_drv+0x84/0xe0 [ 13.861588] __device_attach+0x9c/0x188 [ 13.861596] device_initial_probe+0x14/0x20 [ 13.861610] bus_probe_device+0xac/0xb0 [ 13.861619] deferred_probe_work_func+0x88/0xc0 [ 13.861627] process_one_work+0x148/0x28c [ 13.861640] worker_thread+0x2d0/0x3d8 [ 13.861648] kthread+0x128/0x200 [ 13.861659] ret_from_fork+0x10/0x20 The actual rate that is set is 408MHz if I read the rate after it is set ... [ 13.912099] tegra186_emc_icc_set_bw-362: rate 408000000 This is a simple boot test and so nothing we are doing via debugfs/sysfs to influence this. Jon -- nvpublic
On Tue, Nov 11, 2025 at 3:29 PM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 11/11/2025 17:04, Aaron Kling wrote: > > ... > > > My setup uses the boot stack from L4T r32.7.6, though cboot is source > > built and has had changes over time to support newer Android versions. > > There shouldn't be anything there that would affect emc clock, though. > > > > I'm seeing the emc clock stay at the boot value, namely 1600MHz. Per > > both debugfs clk/emc/clk_rate and bpmp/debug/clk/emc/rate. I don't > > even see 250MHz as an option. Debugfs emc/available_rates lists 204MHz > > as the closest entry. > > > > I'm trying to think what could cause a drop in the selected clock > > rate. This patch should only dynamically change the rate if the opp > > tables exist, enabling the cpufreq based scaling via icc. But those > > tables don't exist on linux-next right now. My test ramdisk does > > nothing except set up sysfs/procfs/etc just enough to run a busybox > > shell for debugging. Do the Nvidia regression testing boot scripts do > > anything to sysfs or debugfs that would affect emc? > > So this is definitely coming from ICC. On boot I see a request for > 250MHz coming from the PCIe driver ... > > [ 13.861227] tegra186_emc_icc_set_bw-356: rate 250000000 > [ 13.861350] CPU: 1 UID: 0 PID: 68 Comm: kworker/u32:1 Not tainted 6.18.0-rc4-next-20251110-00001-gfc12493c80fb-dirty #9 PREEMPT > [ 13.861362] Hardware name: NVIDIA Jetson AGX Xavier Developer Kit (DT) > [ 13.861370] Workqueue: events_unbound deferred_probe_work_func > [ 13.861388] Call trace: > [ 13.861393] show_stack+0x18/0x24 (C) > [ 13.861407] dump_stack_lvl+0x74/0x8c > [ 13.861419] dump_stack+0x18/0x24 > [ 13.861426] tegra186_emc_icc_set_bw+0xc8/0x14c > [ 13.861438] apply_constraints+0x70/0xb0 > [ 13.861451] icc_set_bw+0x88/0x128 > [ 13.861461] tegra_pcie_icc_set+0x7c/0x10c [pcie_tegra194] > [ 13.861499] tegra_pcie_dw_start_link+0x178/0x2b0 [pcie_tegra194] > [ 13.861510] dw_pcie_host_init+0x664/0x6e0 > [ 13.861523] tegra_pcie_dw_probe+0x6d4/0xbfc [pcie_tegra194] > [ 13.861534] platform_probe+0x5c/0x98 > [ 13.861547] really_probe+0xbc/0x2a8 > [ 13.861555] __driver_probe_device+0x78/0x12c > [ 13.861563] driver_probe_device+0x3c/0x15c > [ 13.861572] __device_attach_driver+0xb8/0x134 > [ 13.861580] bus_for_each_drv+0x84/0xe0 > [ 13.861588] __device_attach+0x9c/0x188 > [ 13.861596] device_initial_probe+0x14/0x20 > [ 13.861610] bus_probe_device+0xac/0xb0 > [ 13.861619] deferred_probe_work_func+0x88/0xc0 > [ 13.861627] process_one_work+0x148/0x28c > [ 13.861640] worker_thread+0x2d0/0x3d8 > [ 13.861648] kthread+0x128/0x200 > [ 13.861659] ret_from_fork+0x10/0x20 > > The actual rate that is set is 408MHz if I read the rate after > it is set ... > > [ 13.912099] tegra186_emc_icc_set_bw-362: rate 408000000 > > This is a simple boot test and so nothing we are doing via > debugfs/sysfs to influence this. Alright, I think I've got the picture of what's going on now. The standard arm64 defconfig enables the t194 pcie driver as a module. And my simple busybox ramdisk that I use for mainline regression testing isn't loading any modules. If I set the pcie driver to built-in, I replicate the issue. And I don't see the issue on my normal use case, because I have the dt changes as well. So it appears that the pcie driver submits icc bandwidth. And without cpufreq submitting bandwidth as well, the emc driver gets a very low number and thus sets a very low emc freq. The question becomes... what to do about it? If the related dt changes were submitted to linux-next, everything should fall into place. And I'm not sure where this falls on the severity scale since it doesn't full out break boot or prevent operation. Aaron
On 12/11/2025 00:17, Aaron Kling wrote: >> >> The actual rate that is set is 408MHz if I read the rate after >> it is set ... >> >> [ 13.912099] tegra186_emc_icc_set_bw-362: rate 408000000 >> >> This is a simple boot test and so nothing we are doing via >> debugfs/sysfs to influence this. > > Alright, I think I've got the picture of what's going on now. The > standard arm64 defconfig enables the t194 pcie driver as a module. And > my simple busybox ramdisk that I use for mainline regression testing > isn't loading any modules. If I set the pcie driver to built-in, I > replicate the issue. And I don't see the issue on my normal use case, > because I have the dt changes as well. > > So it appears that the pcie driver submits icc bandwidth. And without > cpufreq submitting bandwidth as well, the emc driver gets a very low > number and thus sets a very low emc freq. The question becomes... what If this depends on DT changes then it is obvious ABI break. Nothing in commit msgs explained ABI impact. > to do about it? If the related dt changes were submitted to > linux-next, everything should fall into place. And I'm not sure where > this falls on the severity scale since it doesn't full out break boot > or prevent operation. > > Aaron Best regards, Krzysztof
On 11/11/2025 23:17, Aaron Kling wrote: ... > Alright, I think I've got the picture of what's going on now. The > standard arm64 defconfig enables the t194 pcie driver as a module. And > my simple busybox ramdisk that I use for mainline regression testing > isn't loading any modules. If I set the pcie driver to built-in, I > replicate the issue. And I don't see the issue on my normal use case, > because I have the dt changes as well. > > So it appears that the pcie driver submits icc bandwidth. And without > cpufreq submitting bandwidth as well, the emc driver gets a very low > number and thus sets a very low emc freq. The question becomes... what > to do about it? If the related dt changes were submitted to > linux-next, everything should fall into place. And I'm not sure where > this falls on the severity scale since it doesn't full out break boot > or prevent operation. Where are the related DT changes? If we can get these into -next and lined up to be merged for v6.19, then that is fine. However, we should not merge this for v6.19 without the DT changes. I will also talk with Thierry to see if he has any concerns about users seeing slow performance if they don't have an up-to-date DTB. Is there any easy way to detect if the DTB has he necessary properties to enable ICC scaling? Jon -- nvpublic
On 12/11/2025 07:18, Jon Hunter wrote: > > On 11/11/2025 23:17, Aaron Kling wrote: > > ... > >> Alright, I think I've got the picture of what's going on now. The >> standard arm64 defconfig enables the t194 pcie driver as a module. And >> my simple busybox ramdisk that I use for mainline regression testing >> isn't loading any modules. If I set the pcie driver to built-in, I >> replicate the issue. And I don't see the issue on my normal use case, >> because I have the dt changes as well. >> >> So it appears that the pcie driver submits icc bandwidth. And without >> cpufreq submitting bandwidth as well, the emc driver gets a very low >> number and thus sets a very low emc freq. The question becomes... what >> to do about it? If the related dt changes were submitted to >> linux-next, everything should fall into place. And I'm not sure where >> this falls on the severity scale since it doesn't full out break boot >> or prevent operation. > > Where are the related DT changes? If we can get these into -next and > lined up to be merged for v6.19, then that is fine. However, we should It's still breaking all the users then. > not merge this for v6.19 without the DT changes. > > I will also talk with Thierry to see if he has any concerns about users > seeing slow performance if they don't have an up-to-date DTB. > > Is there any easy way to detect if the DTB has he necessary properties > to enable ICC scaling? > > Jon > Best regards, Krzysztof
On 12/11/2025 07:26, Krzysztof Kozlowski wrote: > On 12/11/2025 07:18, Jon Hunter wrote: >> >> On 11/11/2025 23:17, Aaron Kling wrote: >> >> ... >> >>> Alright, I think I've got the picture of what's going on now. The >>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>> my simple busybox ramdisk that I use for mainline regression testing >>> isn't loading any modules. If I set the pcie driver to built-in, I >>> replicate the issue. And I don't see the issue on my normal use case, >>> because I have the dt changes as well. >>> >>> So it appears that the pcie driver submits icc bandwidth. And without >>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>> number and thus sets a very low emc freq. The question becomes... what >>> to do about it? If the related dt changes were submitted to >>> linux-next, everything should fall into place. And I'm not sure where >>> this falls on the severity scale since it doesn't full out break boot >>> or prevent operation. >> >> Where are the related DT changes? If we can get these into -next and >> lined up to be merged for v6.19, then that is fine. However, we should > > It's still breaking all the users then. Yes indeed. Jon -- nvpublic
On 12/11/2025 11:59, Jon Hunter wrote: > > On 12/11/2025 07:26, Krzysztof Kozlowski wrote: >> On 12/11/2025 07:18, Jon Hunter wrote: >>> >>> On 11/11/2025 23:17, Aaron Kling wrote: >>> >>> ... >>> >>>> Alright, I think I've got the picture of what's going on now. The >>>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>>> my simple busybox ramdisk that I use for mainline regression testing >>>> isn't loading any modules. If I set the pcie driver to built-in, I >>>> replicate the issue. And I don't see the issue on my normal use case, >>>> because I have the dt changes as well. >>>> >>>> So it appears that the pcie driver submits icc bandwidth. And without >>>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>>> number and thus sets a very low emc freq. The question becomes... what >>>> to do about it? If the related dt changes were submitted to >>>> linux-next, everything should fall into place. And I'm not sure where >>>> this falls on the severity scale since it doesn't full out break boot >>>> or prevent operation. >>> >>> Where are the related DT changes? If we can get these into -next and >>> lined up to be merged for v6.19, then that is fine. However, we should >> >> It's still breaking all the users then. > > Yes indeed. Please test if dropping sync_state from memory controller drivers helps you. This might be the easiest fix and it is also known solution when there are no users. Best regards, Krzysztof
On 12/11/2025 11:42, Krzysztof Kozlowski wrote: > On 12/11/2025 11:59, Jon Hunter wrote: >> >> On 12/11/2025 07:26, Krzysztof Kozlowski wrote: >>> On 12/11/2025 07:18, Jon Hunter wrote: >>>> >>>> On 11/11/2025 23:17, Aaron Kling wrote: >>>> >>>> ... >>>> >>>>> Alright, I think I've got the picture of what's going on now. The >>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>>>> my simple busybox ramdisk that I use for mainline regression testing >>>>> isn't loading any modules. If I set the pcie driver to built-in, I >>>>> replicate the issue. And I don't see the issue on my normal use case, >>>>> because I have the dt changes as well. >>>>> >>>>> So it appears that the pcie driver submits icc bandwidth. And without >>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>>>> number and thus sets a very low emc freq. The question becomes... what >>>>> to do about it? If the related dt changes were submitted to >>>>> linux-next, everything should fall into place. And I'm not sure where >>>>> this falls on the severity scale since it doesn't full out break boot >>>>> or prevent operation. >>>> >>>> Where are the related DT changes? If we can get these into -next and >>>> lined up to be merged for v6.19, then that is fine. However, we should >>> >>> It's still breaking all the users then. >> >> Yes indeed. > > > Please test if dropping sync_state from memory controller drivers helps > you. This might be the easiest fix and it is also known solution when > there are no users. I had a quick look, but I believe that sync_state was first added for Tegra234 devices. The current issue is with Tegra194, so I am not sure we can simply drop it. Jon -- nvpublic
On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 11/11/2025 23:17, Aaron Kling wrote: > > ... > > > Alright, I think I've got the picture of what's going on now. The > > standard arm64 defconfig enables the t194 pcie driver as a module. And > > my simple busybox ramdisk that I use for mainline regression testing > > isn't loading any modules. If I set the pcie driver to built-in, I > > replicate the issue. And I don't see the issue on my normal use case, > > because I have the dt changes as well. > > > > So it appears that the pcie driver submits icc bandwidth. And without > > cpufreq submitting bandwidth as well, the emc driver gets a very low > > number and thus sets a very low emc freq. The question becomes... what > > to do about it? If the related dt changes were submitted to > > linux-next, everything should fall into place. And I'm not sure where > > this falls on the severity scale since it doesn't full out break boot > > or prevent operation. > > Where are the related DT changes? If we can get these into -next and > lined up to be merged for v6.19, then that is fine. However, we should > not merge this for v6.19 without the DT changes. The dt changes are here [0]. This was all part of the same series, keeping everything logically related together. But on v2, Krzysztof said that none of this should have ever been together and that each subsystem should get a separate series, even if the changes are related. Which I did, and now this is split across three series. The actmon series for tegra210 is in a similar state. Split across four series and only one has been pulled to linux-next. > I will also talk with Thierry to see if he has any concerns about users > seeing slow performance if they don't have an up-to-date DTB. > > Is there any easy way to detect if the DTB has he necessary properties > to enable ICC scaling? I'm not sure there is any simple way, given how I set up tegra186 and tegra194. The new dt properties are on the cpu nodes, there's nothing new for the emc node. So the emc driver just unconditionally declares itself to icc. It was doing this before too, but wouldn't do anything on tegra186 or tegra194 because the set_bw function was just a stub and the real logic happened in the bpmp bw mgr, which only exists on tegra234+. Now the set_bw function will directly calculate and set the emc clock as long as the bpmp bw mgr is not supported. Offhand, I can't think of anything existing to check to skip this, because nothing new in the dt has been added in the scope of emc. Aaron [0] https://lore.kernel.org/r/20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com
On 12/11/2025 07:21, Aaron Kling wrote: > On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: >> >> >> On 11/11/2025 23:17, Aaron Kling wrote: >> >> ... >> >>> Alright, I think I've got the picture of what's going on now. The >>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>> my simple busybox ramdisk that I use for mainline regression testing >>> isn't loading any modules. If I set the pcie driver to built-in, I >>> replicate the issue. And I don't see the issue on my normal use case, >>> because I have the dt changes as well. >>> >>> So it appears that the pcie driver submits icc bandwidth. And without >>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>> number and thus sets a very low emc freq. The question becomes... what >>> to do about it? If the related dt changes were submitted to >>> linux-next, everything should fall into place. And I'm not sure where >>> this falls on the severity scale since it doesn't full out break boot >>> or prevent operation. >> >> Where are the related DT changes? If we can get these into -next and >> lined up to be merged for v6.19, then that is fine. However, we should >> not merge this for v6.19 without the DT changes. > > The dt changes are here [0]. To confirm, applying the DT changes do not fix this for me. Thierry is having a look at this to see if there is a way to fix this. BTW, I have also noticed that Thierry's memory frequency test [0] is also failing on Tegra186. The test simply tries to set the frequency via the sysfs and this is now failing. I am seeing ... memory: emc: - available rates: (* = current) memory: emc: - 40800000 memory: emc: - 68000000 memory: emc: - 102000000 memory: emc: - 204000000 memory: emc: - 408000000 memory: emc: - 665600000 memory: emc: - 800000000 memory: emc: - 1062400000 memory: emc: - 1331200000 memory: emc: - 1600000000 memory: emc: - 1866000000 * memory: emc: - testing: memory: emc: - 40800000...OSError: [Errno 34] Numerical result out of range Jon [0] https://github.com/thierryreding/tegra-tests -- nvpublic
On 21/11/2025 12:21, Jon Hunter wrote: > > On 12/11/2025 07:21, Aaron Kling wrote: >> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: >>> >>> >>> On 11/11/2025 23:17, Aaron Kling wrote: >>> >>> ... >>> >>>> Alright, I think I've got the picture of what's going on now. The >>>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>>> my simple busybox ramdisk that I use for mainline regression testing >>>> isn't loading any modules. If I set the pcie driver to built-in, I >>>> replicate the issue. And I don't see the issue on my normal use case, >>>> because I have the dt changes as well. >>>> >>>> So it appears that the pcie driver submits icc bandwidth. And without >>>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>>> number and thus sets a very low emc freq. The question becomes... what >>>> to do about it? If the related dt changes were submitted to >>>> linux-next, everything should fall into place. And I'm not sure where >>>> this falls on the severity scale since it doesn't full out break boot >>>> or prevent operation. >>> >>> Where are the related DT changes? If we can get these into -next and >>> lined up to be merged for v6.19, then that is fine. However, we should >>> not merge this for v6.19 without the DT changes. >> >> The dt changes are here [0]. > > To confirm, applying the DT changes do not fix this for me. Thierry is > having a look at this to see if there is a way to fix this. > > BTW, I have also noticed that Thierry's memory frequency test [0] is > also failing on Tegra186. The test simply tries to set the frequency via > the sysfs and this is now failing. I am seeing .. The pull request was not yet merged, so I can amend it. The issue was reported 12 days ago, so if this cannot be fixed in for such time, then it is not yet ready and I will drop the changes. Best regards, Krzysztof
On Sat, Nov 22, 2025 at 6:01 AM Krzysztof Kozlowski <krzk@kernel.org> wrote: > > On 21/11/2025 12:21, Jon Hunter wrote: > > > > On 12/11/2025 07:21, Aaron Kling wrote: > >> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: > >>> > >>> > >>> On 11/11/2025 23:17, Aaron Kling wrote: > >>> > >>> ... > >>> > >>>> Alright, I think I've got the picture of what's going on now. The > >>>> standard arm64 defconfig enables the t194 pcie driver as a module. And > >>>> my simple busybox ramdisk that I use for mainline regression testing > >>>> isn't loading any modules. If I set the pcie driver to built-in, I > >>>> replicate the issue. And I don't see the issue on my normal use case, > >>>> because I have the dt changes as well. > >>>> > >>>> So it appears that the pcie driver submits icc bandwidth. And without > >>>> cpufreq submitting bandwidth as well, the emc driver gets a very low > >>>> number and thus sets a very low emc freq. The question becomes... what > >>>> to do about it? If the related dt changes were submitted to > >>>> linux-next, everything should fall into place. And I'm not sure where > >>>> this falls on the severity scale since it doesn't full out break boot > >>>> or prevent operation. > >>> > >>> Where are the related DT changes? If we can get these into -next and > >>> lined up to be merged for v6.19, then that is fine. However, we should > >>> not merge this for v6.19 without the DT changes. > >> > >> The dt changes are here [0]. > > > > To confirm, applying the DT changes do not fix this for me. Thierry is > > having a look at this to see if there is a way to fix this. > > > > BTW, I have also noticed that Thierry's memory frequency test [0] is > > also failing on Tegra186. The test simply tries to set the frequency via > > the sysfs and this is now failing. I am seeing .. With this patch dropped from -next, what needs to happen to get it requeued? I gave an analysis over two weeks ago and have seen no response since. Aaron
On 09/12/2025 05:26, Aaron Kling wrote: > On Sat, Nov 22, 2025 at 6:01 AM Krzysztof Kozlowski <krzk@kernel.org> wrote: >> >> On 21/11/2025 12:21, Jon Hunter wrote: >>> >>> On 12/11/2025 07:21, Aaron Kling wrote: >>>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: >>>>> >>>>> >>>>> On 11/11/2025 23:17, Aaron Kling wrote: >>>>> >>>>> ... >>>>> >>>>>> Alright, I think I've got the picture of what's going on now. The >>>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>>>>> my simple busybox ramdisk that I use for mainline regression testing >>>>>> isn't loading any modules. If I set the pcie driver to built-in, I >>>>>> replicate the issue. And I don't see the issue on my normal use case, >>>>>> because I have the dt changes as well. >>>>>> >>>>>> So it appears that the pcie driver submits icc bandwidth. And without >>>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>>>>> number and thus sets a very low emc freq. The question becomes... what >>>>>> to do about it? If the related dt changes were submitted to >>>>>> linux-next, everything should fall into place. And I'm not sure where >>>>>> this falls on the severity scale since it doesn't full out break boot >>>>>> or prevent operation. >>>>> >>>>> Where are the related DT changes? If we can get these into -next and >>>>> lined up to be merged for v6.19, then that is fine. However, we should >>>>> not merge this for v6.19 without the DT changes. >>>> >>>> The dt changes are here [0]. >>> >>> To confirm, applying the DT changes do not fix this for me. Thierry is >>> having a look at this to see if there is a way to fix this. >>> >>> BTW, I have also noticed that Thierry's memory frequency test [0] is >>> also failing on Tegra186. The test simply tries to set the frequency via >>> the sysfs and this is now failing. I am seeing .. > > With this patch dropped from -next, what needs to happen to get it > requeued? I gave an analysis over two weeks ago and have seen no > response since. Hm, I did not see the root cause identified, so maybe I missed something. Anyway, I am waiting for the patchset to be retested and resent. And testing MUST include kernel development process rules, including how patches are taken - see maintainer soc profile. Any dependencies must be clearly marked. Best regards, Krzysztof
On 09/12/2025 05:53, Krzysztof Kozlowski wrote: > On 09/12/2025 05:26, Aaron Kling wrote: >> On Sat, Nov 22, 2025 at 6:01 AM Krzysztof Kozlowski <krzk@kernel.org> wrote: >>> >>> On 21/11/2025 12:21, Jon Hunter wrote: >>>> >>>> On 12/11/2025 07:21, Aaron Kling wrote: >>>>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: >>>>>> >>>>>> >>>>>> On 11/11/2025 23:17, Aaron Kling wrote: >>>>>> >>>>>> ... >>>>>> >>>>>>> Alright, I think I've got the picture of what's going on now. The >>>>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>>>>>> my simple busybox ramdisk that I use for mainline regression testing >>>>>>> isn't loading any modules. If I set the pcie driver to built-in, I >>>>>>> replicate the issue. And I don't see the issue on my normal use case, >>>>>>> because I have the dt changes as well. >>>>>>> >>>>>>> So it appears that the pcie driver submits icc bandwidth. And without >>>>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>>>>>> number and thus sets a very low emc freq. The question becomes... what >>>>>>> to do about it? If the related dt changes were submitted to >>>>>>> linux-next, everything should fall into place. And I'm not sure where >>>>>>> this falls on the severity scale since it doesn't full out break boot >>>>>>> or prevent operation. >>>>>> >>>>>> Where are the related DT changes? If we can get these into -next and >>>>>> lined up to be merged for v6.19, then that is fine. However, we should >>>>>> not merge this for v6.19 without the DT changes. >>>>> >>>>> The dt changes are here [0]. >>>> >>>> To confirm, applying the DT changes do not fix this for me. Thierry is >>>> having a look at this to see if there is a way to fix this. >>>> >>>> BTW, I have also noticed that Thierry's memory frequency test [0] is >>>> also failing on Tegra186. The test simply tries to set the frequency via >>>> the sysfs and this is now failing. I am seeing .. >> >> With this patch dropped from -next, what needs to happen to get it >> requeued? I gave an analysis over two weeks ago and have seen no >> response since. > > Hm, I did not see the root cause identified, so maybe I missed something. > > Anyway, I am waiting for the patchset to be retested and resent. And > testing MUST include kernel development process rules, including how > patches are taken - see maintainer soc profile. Any dependencies must be > clearly marked. Yes me too. I am happy to re-test any updates. Jon -- nvpublic
On Fri, Nov 21, 2025 at 5:21 AM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 12/11/2025 07:21, Aaron Kling wrote: > > On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: > >> > >> > >> On 11/11/2025 23:17, Aaron Kling wrote: > >> > >> ... > >> > >>> Alright, I think I've got the picture of what's going on now. The > >>> standard arm64 defconfig enables the t194 pcie driver as a module. And > >>> my simple busybox ramdisk that I use for mainline regression testing > >>> isn't loading any modules. If I set the pcie driver to built-in, I > >>> replicate the issue. And I don't see the issue on my normal use case, > >>> because I have the dt changes as well. > >>> > >>> So it appears that the pcie driver submits icc bandwidth. And without > >>> cpufreq submitting bandwidth as well, the emc driver gets a very low > >>> number and thus sets a very low emc freq. The question becomes... what > >>> to do about it? If the related dt changes were submitted to > >>> linux-next, everything should fall into place. And I'm not sure where > >>> this falls on the severity scale since it doesn't full out break boot > >>> or prevent operation. > >> > >> Where are the related DT changes? If we can get these into -next and > >> lined up to be merged for v6.19, then that is fine. However, we should > >> not merge this for v6.19 without the DT changes. > > > > The dt changes are here [0]. > > To confirm, applying the DT changes do not fix this for me. Thierry is > having a look at this to see if there is a way to fix this. > > BTW, I have also noticed that Thierry's memory frequency test [0] is > also failing on Tegra186. The test simply tries to set the frequency via > the sysfs and this is now failing. I am seeing ... > > memory: emc: - available rates: (* = current) > memory: emc: - 40800000 > memory: emc: - 68000000 > memory: emc: - 102000000 > memory: emc: - 204000000 > memory: emc: - 408000000 > memory: emc: - 665600000 > memory: emc: - 800000000 > memory: emc: - 1062400000 > memory: emc: - 1331200000 > memory: emc: - 1600000000 > memory: emc: - 1866000000 * > memory: emc: - testing: > memory: emc: - 40800000...OSError: [Errno 34] Numerical result out > of range Question. Does this test run and pass on jetson-tk1? I based the tegra210 and tegra186 [0] code on tegra124 [1]. And I don't see a difference in the flow now. What appears to be happening is that icc is reporting a high bandwidth, setting the emc min_freq to something like 1600MHz. Then debugfs is having max_freq set to something low like 40.8MHz. Then the linked code block fails because the higher of the min_freqs is greater than the lower of the max_freqs. But if this same test is run on jetson-tk1, I don't see how it passes. Unless maybe the t124 actmon is consistently setting min freqs during the tests. An argument could be made that any attempt to set debugfs should win a conflict with icc. That could be done. But if that needs done here, I'd argue that it needs replicated across all other applicable emc drivers too. Aaron [0] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/memory/tegra/tegra186-emc.c?h=next-20251121#n78 [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/memory/tegra/tegra124-emc.c?h=v6.18-rc6#n1066
On 21/11/2025 18:17, Aaron Kling wrote: > On Fri, Nov 21, 2025 at 5:21 AM Jon Hunter <jonathanh@nvidia.com> wrote: >> >> >> On 12/11/2025 07:21, Aaron Kling wrote: >>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: >>>> >>>> >>>> On 11/11/2025 23:17, Aaron Kling wrote: >>>> >>>> ... >>>> >>>>> Alright, I think I've got the picture of what's going on now. The >>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>>>> my simple busybox ramdisk that I use for mainline regression testing >>>>> isn't loading any modules. If I set the pcie driver to built-in, I >>>>> replicate the issue. And I don't see the issue on my normal use case, >>>>> because I have the dt changes as well. >>>>> >>>>> So it appears that the pcie driver submits icc bandwidth. And without >>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>>>> number and thus sets a very low emc freq. The question becomes... what >>>>> to do about it? If the related dt changes were submitted to >>>>> linux-next, everything should fall into place. And I'm not sure where >>>>> this falls on the severity scale since it doesn't full out break boot >>>>> or prevent operation. >>>> >>>> Where are the related DT changes? If we can get these into -next and >>>> lined up to be merged for v6.19, then that is fine. However, we should >>>> not merge this for v6.19 without the DT changes. >>> >>> The dt changes are here [0]. >> >> To confirm, applying the DT changes do not fix this for me. Thierry is >> having a look at this to see if there is a way to fix this. >> >> BTW, I have also noticed that Thierry's memory frequency test [0] is >> also failing on Tegra186. The test simply tries to set the frequency via >> the sysfs and this is now failing. I am seeing ... >> >> memory: emc: - available rates: (* = current) >> memory: emc: - 40800000 >> memory: emc: - 68000000 >> memory: emc: - 102000000 >> memory: emc: - 204000000 >> memory: emc: - 408000000 >> memory: emc: - 665600000 >> memory: emc: - 800000000 >> memory: emc: - 1062400000 >> memory: emc: - 1331200000 >> memory: emc: - 1600000000 >> memory: emc: - 1866000000 * >> memory: emc: - testing: >> memory: emc: - 40800000...OSError: [Errno 34] Numerical result out >> of range > > Question. Does this test run and pass on jetson-tk1? I based the > tegra210 and tegra186 [0] code on tegra124 [1]. And I don't see a > difference in the flow now. What appears to be happening is that icc > is reporting a high bandwidth, setting the emc min_freq to something > like 1600MHz. Then debugfs is having max_freq set to something low > like 40.8MHz. Then the linked code block fails because the higher of > the min_freqs is greater than the lower of the max_freqs. But if this > same test is run on jetson-tk1, I don't see how it passes. Unless > maybe the t124 actmon is consistently setting min freqs during the > tests. So we don't currently run this test on Tegra124. We could certainly try. I don't recall if there was an issue that prevented us from doing so now. > An argument could be made that any attempt to set debugfs should win a > conflict with icc. That could be done. But if that needs done here, > I'd argue that it needs replicated across all other applicable emc > drivers too. The bottom line is that we cannot regress anything that was working before. Jon -- nvpublic
On Tue, Dec 9, 2025 at 10:08 PM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 21/11/2025 18:17, Aaron Kling wrote: > > On Fri, Nov 21, 2025 at 5:21 AM Jon Hunter <jonathanh@nvidia.com> wrote: > >> > >> > >> On 12/11/2025 07:21, Aaron Kling wrote: > >>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: > >>>> > >>>> > >>>> On 11/11/2025 23:17, Aaron Kling wrote: > >>>> > >>>> ... > >>>> > >>>>> Alright, I think I've got the picture of what's going on now. The > >>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And > >>>>> my simple busybox ramdisk that I use for mainline regression testing > >>>>> isn't loading any modules. If I set the pcie driver to built-in, I > >>>>> replicate the issue. And I don't see the issue on my normal use case, > >>>>> because I have the dt changes as well. > >>>>> > >>>>> So it appears that the pcie driver submits icc bandwidth. And without > >>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low > >>>>> number and thus sets a very low emc freq. The question becomes... what > >>>>> to do about it? If the related dt changes were submitted to > >>>>> linux-next, everything should fall into place. And I'm not sure where > >>>>> this falls on the severity scale since it doesn't full out break boot > >>>>> or prevent operation. > >>>> > >>>> Where are the related DT changes? If we can get these into -next and > >>>> lined up to be merged for v6.19, then that is fine. However, we should > >>>> not merge this for v6.19 without the DT changes. > >>> > >>> The dt changes are here [0]. > >> > >> To confirm, applying the DT changes do not fix this for me. Thierry is > >> having a look at this to see if there is a way to fix this. > >> > >> BTW, I have also noticed that Thierry's memory frequency test [0] is > >> also failing on Tegra186. The test simply tries to set the frequency via > >> the sysfs and this is now failing. I am seeing ... > >> > >> memory: emc: - available rates: (* = current) > >> memory: emc: - 40800000 > >> memory: emc: - 68000000 > >> memory: emc: - 102000000 > >> memory: emc: - 204000000 > >> memory: emc: - 408000000 > >> memory: emc: - 665600000 > >> memory: emc: - 800000000 > >> memory: emc: - 1062400000 > >> memory: emc: - 1331200000 > >> memory: emc: - 1600000000 > >> memory: emc: - 1866000000 * > >> memory: emc: - testing: > >> memory: emc: - 40800000...OSError: [Errno 34] Numerical result out > >> of range > > > > Question. Does this test run and pass on jetson-tk1? I based the > > tegra210 and tegra186 [0] code on tegra124 [1]. And I don't see a > > difference in the flow now. What appears to be happening is that icc > > is reporting a high bandwidth, setting the emc min_freq to something > > like 1600MHz. Then debugfs is having max_freq set to something low > > like 40.8MHz. Then the linked code block fails because the higher of > > the min_freqs is greater than the lower of the max_freqs. But if this > > same test is run on jetson-tk1, I don't see how it passes. Unless > > maybe the t124 actmon is consistently setting min freqs during the > > tests. > > So we don't currently run this test on Tegra124. We could certainly try. > I don't recall if there was an issue that prevented us from doing so now. > > > An argument could be made that any attempt to set debugfs should win a > > conflict with icc. That could be done. But if that needs done here, > > I'd argue that it needs replicated across all other applicable emc > > drivers too. > > The bottom line is that we cannot regress anything that was working before. Let me try to iterate the potential issues I've seen stated here. If I'm missing anything, please fill in the blanks. 1) If this change is applied without the related dt change and the pcie drvier is loaded, the emc clock can become stuck at the lowest rate. This is caused by the pcie driver providing icc data, but nothing else is. So the very low requested bandwidth results in the emc clock being set very low. I'm not sure there is a 'fix' for this, beyond making sure the dt change is merged to ensure that the cpufreq driver provides bandwidth info, causing the emc driver to select a more reasonable emc clock rate. This is a similar situation to what's currently blocking the tegra210 actmon series. I don't think there is a way for the drivers to know if icc data is missing/wrong. The scaling is doing exactly what it's told based on the icc routing given in the dt. 2) Jon, you report that even with both this change and the related dt change, that the issue is still not fixed. But then posted a log showing that the emc rate is set to max. If the issue is that emc rate is too low, then how can debugfs report that the rate is max? For reference, everything scales as expected for me given this change plus the dt change on both p2771 and p3636+p3509. 3) If icc is requesting enough bandwidth to set the emc clock to a high value, then a user tries to set debugfs max_freq to a lower value, this code will reject the change. I do not believe this is an issue unique to this code. tegra20-emc, tegra30-emc, and tegra124-emc all have this same flow. And so does my proposed change to tegra210-emc-core in the actmon series. This is why I asked if tegra124 ran this test, to see if the failure was unique. If this is not a unique failure, then I'd argue that all instances need changed, not just this one causing diverging results depending on the soc being utilized. A lot of the work I'm doing is to try to bring unity and feature parity to all the tegra socs I'm working on. I don't want to cause even more divergence. What actions need taken for which issue? Aaron
On 10/12/2025 05:06, Aaron Kling wrote: ... > Let me try to iterate the potential issues I've seen stated here. If > I'm missing anything, please fill in the blanks. > > 1) If this change is applied without the related dt change and the > pcie drvier is loaded, the emc clock can become stuck at the lowest > rate. This is caused by the pcie driver providing icc data, but > nothing else is. So the very low requested bandwidth results in the > emc clock being set very low. I'm not sure there is a 'fix' for this, > beyond making sure the dt change is merged to ensure that the cpufreq > driver provides bandwidth info, causing the emc driver to select a > more reasonable emc clock rate. This is a similar situation to what's > currently blocking the tegra210 actmon series. I don't think there is > a way for the drivers to know if icc data is missing/wrong. The > scaling is doing exactly what it's told based on the icc routing given > in the dt. So this is the fundamental issue with this that must be fixed. We can't allow the PCIe driver to slow the system down. I think that Krzysztof suggested we need some way to determine if the necessary ICC clients are present/registered for ICC to work. Admittedly, I have no idea if there is a simple way to do this, but we need something like that. > 2) Jon, you report that even with both this change and the related dt > change, that the issue is still not fixed. But then posted a log > showing that the emc rate is set to max. If the issue is that emc rate > is too low, then how can debugfs report that the rate is max? For > reference, everything scales as expected for me given this change plus > the dt change on both p2771 and p3636+p3509. To clarify, this broke the boot test on Tegra194 because the boot was too slow. However, this also broke the EMC test on Tegra186 because setting the frequency from the debugfs failed. So two different failures on two different devices. I am guessing the EMC test would also fail on Tegra194, but given that it does not boot, we did not get that far. > 3) If icc is requesting enough bandwidth to set the emc clock to a > high value, then a user tries to set debugfs max_freq to a lower > value, this code will reject the change. I do not believe this is an > issue unique to this code. tegra20-emc, tegra30-emc, and tegra124-emc > all have this same flow. And so does my proposed change to > tegra210-emc-core in the actmon series. This is why I asked if > tegra124 ran this test, to see if the failure was unique. If this is > not a unique failure, then I'd argue that all instances need changed, > not just this one causing diverging results depending on the soc being > utilized. A lot of the work I'm doing is to try to bring unity and > feature parity to all the tegra socs I'm working on. I don't want to > cause even more divergence. Yes that is fair point, however, we need to detect this in the tegra-tests so that we know that this will not work. It would be nice if we could disable ICC from userspace and then run the test. Bottom line here is that #1 is the problem that needs to be fixed. Jon -- nvpublic
On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 10/12/2025 05:06, Aaron Kling wrote: > > ... > > > Let me try to iterate the potential issues I've seen stated here. If > > I'm missing anything, please fill in the blanks. > > > > 1) If this change is applied without the related dt change and the > > pcie drvier is loaded, the emc clock can become stuck at the lowest > > rate. This is caused by the pcie driver providing icc data, but > > nothing else is. So the very low requested bandwidth results in the > > emc clock being set very low. I'm not sure there is a 'fix' for this, > > beyond making sure the dt change is merged to ensure that the cpufreq > > driver provides bandwidth info, causing the emc driver to select a > > more reasonable emc clock rate. This is a similar situation to what's > > currently blocking the tegra210 actmon series. I don't think there is > > a way for the drivers to know if icc data is missing/wrong. The > > scaling is doing exactly what it's told based on the icc routing given > > in the dt. > > So this is the fundamental issue with this that must be fixed. We can't > allow the PCIe driver to slow the system down. I think that Krzysztof > suggested we need some way to determine if the necessary ICC clients are > present/registered for ICC to work. Admittedly, I have no idea if there > is a simple way to do this, but we need something like that. I'm not sure I understand how checking clients would work. Is there a mechanism for the emc driver to know if cpufreq is registered to icc in a way that works with probe deferrals, but also allows for it to be optional? Alternatively if there is not, can we just accept the abi break and have this and the dt change depend on each other? I know it's not desirable or the first choice, but if the other option is to rewrite part of the icc system, then perhaps it should be an option. > > 2) Jon, you report that even with both this change and the related dt > > change, that the issue is still not fixed. But then posted a log > > showing that the emc rate is set to max. If the issue is that emc rate > > is too low, then how can debugfs report that the rate is max? For > > reference, everything scales as expected for me given this change plus > > the dt change on both p2771 and p3636+p3509. > > To clarify, this broke the boot test on Tegra194 because the boot was > too slow. However, this also broke the EMC test on Tegra186 because > setting the frequency from the debugfs failed. So two different failures > on two different devices. I am guessing the EMC test would also fail on > Tegra194, but given that it does not boot, we did not get that far. So you're saying that even with the dt changes, this change on tegra194 still does not boot before the regression test framework times out? If so, I need some more details about this. I have not seen issues on p2972 or p3518. For example, if I boot to android recovery where I set the cpufreq governor to performance, I see emc clock rate set to 2133 MHz and 1600 MHz respectively. And boot time from kernel start to pixels on display is 15 seconds, give or take a couple seconds. This is using the boot stack from l4t r32.7.6. > > 3) If icc is requesting enough bandwidth to set the emc clock to a > > high value, then a user tries to set debugfs max_freq to a lower > > value, this code will reject the change. I do not believe this is an > > issue unique to this code. tegra20-emc, tegra30-emc, and tegra124-emc > > all have this same flow. And so does my proposed change to > > tegra210-emc-core in the actmon series. This is why I asked if > > tegra124 ran this test, to see if the failure was unique. If this is > > not a unique failure, then I'd argue that all instances need changed, > > not just this one causing diverging results depending on the soc being > > utilized. A lot of the work I'm doing is to try to bring unity and > > feature parity to all the tegra socs I'm working on. I don't want to > > cause even more divergence. > > Yes that is fair point, however, we need to detect this in the > tegra-tests so that we know that this will not work. It would be nice if > we could disable ICC from userspace and then run the test. I am unaware of a way to disable icc from userspace. That would be useful to me as well. And for the record, I'm not refusing to make such a change. I would just want to have a series to change all the others uploaded and merged concurrently. But I cannot test t20 or t30. Only t124+. > Bottom line here is that #1 is the problem that needs to be fixed. Aaron
On 10/12/2025 18:32, Aaron Kling wrote: > On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote: >> >> >> On 10/12/2025 05:06, Aaron Kling wrote: >> >> ... >> >>> Let me try to iterate the potential issues I've seen stated here. If >>> I'm missing anything, please fill in the blanks. >>> >>> 1) If this change is applied without the related dt change and the >>> pcie drvier is loaded, the emc clock can become stuck at the lowest >>> rate. This is caused by the pcie driver providing icc data, but >>> nothing else is. So the very low requested bandwidth results in the >>> emc clock being set very low. I'm not sure there is a 'fix' for this, >>> beyond making sure the dt change is merged to ensure that the cpufreq >>> driver provides bandwidth info, causing the emc driver to select a >>> more reasonable emc clock rate. This is a similar situation to what's >>> currently blocking the tegra210 actmon series. I don't think there is >>> a way for the drivers to know if icc data is missing/wrong. The >>> scaling is doing exactly what it's told based on the icc routing given >>> in the dt. >> >> So this is the fundamental issue with this that must be fixed. We can't >> allow the PCIe driver to slow the system down. I think that Krzysztof >> suggested we need some way to determine if the necessary ICC clients are >> present/registered for ICC to work. Admittedly, I have no idea if there >> is a simple way to do this, but we need something like that. > > I'm not sure I understand how checking clients would work. Is there a > mechanism for the emc driver to know if cpufreq is registered to icc > in a way that works with probe deferrals, but also allows for it to be > optional? I am not sure if such a mechanism exists either, but it seems that we need something like this. > Alternatively if there is not, can we just accept the abi break and > have this and the dt change depend on each other? I know it's not > desirable or the first choice, but if the other option is to rewrite > part of the icc system, then perhaps it should be an option. I am not sure it is an ABI break, but the default performance might be worse. I am not sure if you are proposing a way to enforce the dependency or just saying that there is a dependency. We can't do the latter, but if there is a way for the kernel to check the dependency and make the right choice, then that should work. >>> 2) Jon, you report that even with both this change and the related dt >>> change, that the issue is still not fixed. But then posted a log >>> showing that the emc rate is set to max. If the issue is that emc rate >>> is too low, then how can debugfs report that the rate is max? For >>> reference, everything scales as expected for me given this change plus >>> the dt change on both p2771 and p3636+p3509. >> >> To clarify, this broke the boot test on Tegra194 because the boot was >> too slow. However, this also broke the EMC test on Tegra186 because >> setting the frequency from the debugfs failed. So two different failures >> on two different devices. I am guessing the EMC test would also fail on >> Tegra194, but given that it does not boot, we did not get that far. > > So you're saying that even with the dt changes, this change on > tegra194 still does not boot before the regression test framework > times out? If so, I need some more details about this. I have not seen > issues on p2972 or p3518. For example, if I boot to android recovery > where I set the cpufreq governor to performance, I see emc clock rate > set to 2133 MHz and 1600 MHz respectively. And boot time from kernel > start to pixels on display is 15 seconds, give or take a couple > seconds. This is using the boot stack from l4t r32.7.6. Yes. The boot failure here is not a hard boot failure, but the device takes too long to boot and the boot test times out. And no we will not increase the timeout as it is there for a reason. It could well be because the default governor is not set to performance. If you boot with just using the stock 'defconfig' for ARM64 without setting the governor does it take longer? Jon -- nvpublic
On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 10/12/2025 18:32, Aaron Kling wrote: > > On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote: > >> > >> > >> On 10/12/2025 05:06, Aaron Kling wrote: > >> > >> ... > >> > >>> Let me try to iterate the potential issues I've seen stated here. If > >>> I'm missing anything, please fill in the blanks. > >>> > >>> 1) If this change is applied without the related dt change and the > >>> pcie drvier is loaded, the emc clock can become stuck at the lowest > >>> rate. This is caused by the pcie driver providing icc data, but > >>> nothing else is. So the very low requested bandwidth results in the > >>> emc clock being set very low. I'm not sure there is a 'fix' for this, > >>> beyond making sure the dt change is merged to ensure that the cpufreq > >>> driver provides bandwidth info, causing the emc driver to select a > >>> more reasonable emc clock rate. This is a similar situation to what's > >>> currently blocking the tegra210 actmon series. I don't think there is > >>> a way for the drivers to know if icc data is missing/wrong. The > >>> scaling is doing exactly what it's told based on the icc routing given > >>> in the dt. > >> > >> So this is the fundamental issue with this that must be fixed. We can't > >> allow the PCIe driver to slow the system down. I think that Krzysztof > >> suggested we need some way to determine if the necessary ICC clients are > >> present/registered for ICC to work. Admittedly, I have no idea if there > >> is a simple way to do this, but we need something like that. > > > > I'm not sure I understand how checking clients would work. Is there a > > mechanism for the emc driver to know if cpufreq is registered to icc > > in a way that works with probe deferrals, but also allows for it to be > > optional? > > I am not sure if such a mechanism exists either, but it seems that we > need something like this. > > > Alternatively if there is not, can we just accept the abi break and > > have this and the dt change depend on each other? I know it's not > > desirable or the first choice, but if the other option is to rewrite > > part of the icc system, then perhaps it should be an option. > > I am not sure it is an ABI break, but the default performance might be > worse. I am not sure if you are proposing a way to enforce the > dependency or just saying that there is a dependency. We can't do the > latter, but if there is a way for the kernel to check the dependency and > make the right choice, then that should work. So we can't accept that older dt's will run slower on a newer kernel and say that a newer dt is needed for full performance? If that's not an option, then I have no idea how to resolve this. I'm not greatly knowledgeable about the icc subsystem. I can try to look into options, but I'm not greatly optimistic about me finding one. If someone could suggest a concept on how to make it work, I could implement it. But I'm not even seeing the concept right now. > >>> 2) Jon, you report that even with both this change and the related dt > >>> change, that the issue is still not fixed. But then posted a log > >>> showing that the emc rate is set to max. If the issue is that emc rate > >>> is too low, then how can debugfs report that the rate is max? For > >>> reference, everything scales as expected for me given this change plus > >>> the dt change on both p2771 and p3636+p3509. > >> > >> To clarify, this broke the boot test on Tegra194 because the boot was > >> too slow. However, this also broke the EMC test on Tegra186 because > >> setting the frequency from the debugfs failed. So two different failures > >> on two different devices. I am guessing the EMC test would also fail on > >> Tegra194, but given that it does not boot, we did not get that far. > > > > So you're saying that even with the dt changes, this change on > > tegra194 still does not boot before the regression test framework > > times out? If so, I need some more details about this. I have not seen > > issues on p2972 or p3518. For example, if I boot to android recovery > > where I set the cpufreq governor to performance, I see emc clock rate > > set to 2133 MHz and 1600 MHz respectively. And boot time from kernel > > start to pixels on display is 15 seconds, give or take a couple > > seconds. This is using the boot stack from l4t r32.7.6. > > Yes. The boot failure here is not a hard boot failure, but the device > takes too long to boot and the boot test times out. And no we will not > increase the timeout as it is there for a reason. It could well be > because the default governor is not set to performance. If you boot with > just using the stock 'defconfig' for ARM64 without setting the governor > does it take longer? So, I checked out next-20251210, then b4 shazam'ed this series and the matching dt series, 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with LLVM=1 ARCH=arm64 make defconfig LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb I packaged them into an android boot image using a lightly modified copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note that this includes no modules, and since the pcie driver is =m in defconfig, it is not included. Then I flashed that with the l4t r32.7.6 boot stack to p2972. I got the shell on uart after 4.275 seconds in the kernel. Per sysfs, the cpufreq governor is schedutil and all policies are idling at min freq, 115200. And per debugfs, the emc clock is 800000000. All this looks to be as expected. I have no idea why the regression test setup is timing out. I have not seen the issue through any of my testing. On pure mainline as per the above paragraph, or with the patches on the android common kernel, as per my target use case. I don't know what to do if I can't replicate the issue. I don't suppose the flash package for the regression test setup is something that could be released? Aaron
On 10/12/2025 22:41, Aaron Kling wrote: > On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote: >> >> >> On 10/12/2025 18:32, Aaron Kling wrote: >>> On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote: >>>> >>>> >>>> On 10/12/2025 05:06, Aaron Kling wrote: >>>> >>>> ... >>>> >>>>> Let me try to iterate the potential issues I've seen stated here. If >>>>> I'm missing anything, please fill in the blanks. >>>>> >>>>> 1) If this change is applied without the related dt change and the >>>>> pcie drvier is loaded, the emc clock can become stuck at the lowest >>>>> rate. This is caused by the pcie driver providing icc data, but >>>>> nothing else is. So the very low requested bandwidth results in the >>>>> emc clock being set very low. I'm not sure there is a 'fix' for this, >>>>> beyond making sure the dt change is merged to ensure that the cpufreq >>>>> driver provides bandwidth info, causing the emc driver to select a >>>>> more reasonable emc clock rate. This is a similar situation to what's >>>>> currently blocking the tegra210 actmon series. I don't think there is >>>>> a way for the drivers to know if icc data is missing/wrong. The >>>>> scaling is doing exactly what it's told based on the icc routing given >>>>> in the dt. >>>> >>>> So this is the fundamental issue with this that must be fixed. We can't >>>> allow the PCIe driver to slow the system down. I think that Krzysztof >>>> suggested we need some way to determine if the necessary ICC clients are >>>> present/registered for ICC to work. Admittedly, I have no idea if there >>>> is a simple way to do this, but we need something like that. >>> >>> I'm not sure I understand how checking clients would work. Is there a >>> mechanism for the emc driver to know if cpufreq is registered to icc >>> in a way that works with probe deferrals, but also allows for it to be >>> optional? >> >> I am not sure if such a mechanism exists either, but it seems that we >> need something like this. >> >>> Alternatively if there is not, can we just accept the abi break and >>> have this and the dt change depend on each other? I know it's not >>> desirable or the first choice, but if the other option is to rewrite >>> part of the icc system, then perhaps it should be an option. >> >> I am not sure it is an ABI break, but the default performance might be >> worse. I am not sure if you are proposing a way to enforce the >> dependency or just saying that there is a dependency. We can't do the >> latter, but if there is a way for the kernel to check the dependency and >> make the right choice, then that should work. > > So we can't accept that older dt's will run slower on a newer kernel > and say that a newer dt is needed for full performance? > > If that's not an option, then I have no idea how to resolve this. I'm > not greatly knowledgeable about the icc subsystem. I can try to look > into options, but I'm not greatly optimistic about me finding one. If > someone could suggest a concept on how to make it work, I could > implement it. But I'm not even seeing the concept right now. > >>>>> 2) Jon, you report that even with both this change and the related dt >>>>> change, that the issue is still not fixed. But then posted a log >>>>> showing that the emc rate is set to max. If the issue is that emc rate >>>>> is too low, then how can debugfs report that the rate is max? For >>>>> reference, everything scales as expected for me given this change plus >>>>> the dt change on both p2771 and p3636+p3509. >>>> >>>> To clarify, this broke the boot test on Tegra194 because the boot was >>>> too slow. However, this also broke the EMC test on Tegra186 because >>>> setting the frequency from the debugfs failed. So two different failures >>>> on two different devices. I am guessing the EMC test would also fail on >>>> Tegra194, but given that it does not boot, we did not get that far. >>> >>> So you're saying that even with the dt changes, this change on >>> tegra194 still does not boot before the regression test framework >>> times out? If so, I need some more details about this. I have not seen >>> issues on p2972 or p3518. For example, if I boot to android recovery >>> where I set the cpufreq governor to performance, I see emc clock rate >>> set to 2133 MHz and 1600 MHz respectively. And boot time from kernel >>> start to pixels on display is 15 seconds, give or take a couple >>> seconds. This is using the boot stack from l4t r32.7.6. >> >> Yes. The boot failure here is not a hard boot failure, but the device >> takes too long to boot and the boot test times out. And no we will not >> increase the timeout as it is there for a reason. It could well be >> because the default governor is not set to performance. If you boot with >> just using the stock 'defconfig' for ARM64 without setting the governor >> does it take longer? > > So, I checked out next-20251210, then b4 shazam'ed this series and the > matching dt series, > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with > LLVM=1 ARCH=arm64 make defconfig > LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb > > I packaged them into an android boot image using a lightly modified > copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note > that this includes no modules, and since the pcie driver is =m in > defconfig, it is not included. Then I flashed that with the l4t > r32.7.6 boot stack to p2972. I got the shell on uart after 4.275 > seconds in the kernel. Per sysfs, the cpufreq governor is schedutil > and all policies are idling at min freq, 115200. And per debugfs, the > emc clock is 800000000. All this looks to be as expected. > > I have no idea why the regression test setup is timing out. I have not > seen the issue through any of my testing. On pure mainline as per the > above paragraph, or with the patches on the android common kernel, as > per my target use case. I don't know what to do if I can't replicate > the issue. I don't suppose the flash package for the regression test > setup is something that could be released? I thought we already concluded that you did not see this because you did not have the PCIe module present in your testing? From the above its sounds like you still don't have that driver present and so you don't see the issue. I guess I am not surprised by that but I am not sure why you are now saying you have no idea why this is timing out? I thought this was understood. -- nvpublic
On Thu, Dec 11, 2025 at 1:47 AM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 10/12/2025 22:41, Aaron Kling wrote: > > On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote: > >> > >> > >> On 10/12/2025 18:32, Aaron Kling wrote: > >>> On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote: > >>>> > >>>> > >>>> On 10/12/2025 05:06, Aaron Kling wrote: > >>>> > >>>> ... > >>>> > >>>>> Let me try to iterate the potential issues I've seen stated here. If > >>>>> I'm missing anything, please fill in the blanks. > >>>>> > >>>>> 1) If this change is applied without the related dt change and the > >>>>> pcie drvier is loaded, the emc clock can become stuck at the lowest > >>>>> rate. This is caused by the pcie driver providing icc data, but > >>>>> nothing else is. So the very low requested bandwidth results in the > >>>>> emc clock being set very low. I'm not sure there is a 'fix' for this, > >>>>> beyond making sure the dt change is merged to ensure that the cpufreq > >>>>> driver provides bandwidth info, causing the emc driver to select a > >>>>> more reasonable emc clock rate. This is a similar situation to what's > >>>>> currently blocking the tegra210 actmon series. I don't think there is > >>>>> a way for the drivers to know if icc data is missing/wrong. The > >>>>> scaling is doing exactly what it's told based on the icc routing given > >>>>> in the dt. > >>>> > >>>> So this is the fundamental issue with this that must be fixed. We can't > >>>> allow the PCIe driver to slow the system down. I think that Krzysztof > >>>> suggested we need some way to determine if the necessary ICC clients are > >>>> present/registered for ICC to work. Admittedly, I have no idea if there > >>>> is a simple way to do this, but we need something like that. > >>> > >>> I'm not sure I understand how checking clients would work. Is there a > >>> mechanism for the emc driver to know if cpufreq is registered to icc > >>> in a way that works with probe deferrals, but also allows for it to be > >>> optional? > >> > >> I am not sure if such a mechanism exists either, but it seems that we > >> need something like this. > >> > >>> Alternatively if there is not, can we just accept the abi break and > >>> have this and the dt change depend on each other? I know it's not > >>> desirable or the first choice, but if the other option is to rewrite > >>> part of the icc system, then perhaps it should be an option. > >> > >> I am not sure it is an ABI break, but the default performance might be > >> worse. I am not sure if you are proposing a way to enforce the > >> dependency or just saying that there is a dependency. We can't do the > >> latter, but if there is a way for the kernel to check the dependency and > >> make the right choice, then that should work. > > > > So we can't accept that older dt's will run slower on a newer kernel > > and say that a newer dt is needed for full performance? > > > > If that's not an option, then I have no idea how to resolve this. I'm > > not greatly knowledgeable about the icc subsystem. I can try to look > > into options, but I'm not greatly optimistic about me finding one. If > > someone could suggest a concept on how to make it work, I could > > implement it. But I'm not even seeing the concept right now. > > > >>>>> 2) Jon, you report that even with both this change and the related dt > >>>>> change, that the issue is still not fixed. But then posted a log > >>>>> showing that the emc rate is set to max. If the issue is that emc rate > >>>>> is too low, then how can debugfs report that the rate is max? For > >>>>> reference, everything scales as expected for me given this change plus > >>>>> the dt change on both p2771 and p3636+p3509. > >>>> > >>>> To clarify, this broke the boot test on Tegra194 because the boot was > >>>> too slow. However, this also broke the EMC test on Tegra186 because > >>>> setting the frequency from the debugfs failed. So two different failures > >>>> on two different devices. I am guessing the EMC test would also fail on > >>>> Tegra194, but given that it does not boot, we did not get that far. > >>> > >>> So you're saying that even with the dt changes, this change on > >>> tegra194 still does not boot before the regression test framework > >>> times out? If so, I need some more details about this. I have not seen > >>> issues on p2972 or p3518. For example, if I boot to android recovery > >>> where I set the cpufreq governor to performance, I see emc clock rate > >>> set to 2133 MHz and 1600 MHz respectively. And boot time from kernel > >>> start to pixels on display is 15 seconds, give or take a couple > >>> seconds. This is using the boot stack from l4t r32.7.6. > >> > >> Yes. The boot failure here is not a hard boot failure, but the device > >> takes too long to boot and the boot test times out. And no we will not > >> increase the timeout as it is there for a reason. It could well be > >> because the default governor is not set to performance. If you boot with > >> just using the stock 'defconfig' for ARM64 without setting the governor > >> does it take longer? > > > > So, I checked out next-20251210, then b4 shazam'ed this series and the > > matching dt series, > > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with > > LLVM=1 ARCH=arm64 make defconfig > > LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb > > > > I packaged them into an android boot image using a lightly modified > > copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note > > that this includes no modules, and since the pcie driver is =m in > > defconfig, it is not included. Then I flashed that with the l4t > > r32.7.6 boot stack to p2972. I got the shell on uart after 4.275 > > seconds in the kernel. Per sysfs, the cpufreq governor is schedutil > > and all policies are idling at min freq, 115200. And per debugfs, the > > emc clock is 800000000. All this looks to be as expected. > > > > I have no idea why the regression test setup is timing out. I have not > > seen the issue through any of my testing. On pure mainline as per the > > above paragraph, or with the patches on the android common kernel, as > > per my target use case. I don't know what to do if I can't replicate > > the issue. I don't suppose the flash package for the regression test > > setup is something that could be released? > > I thought we already concluded that you did not see this because you did > not have the PCIe module present in your testing? From the above its > sounds like you still don't have that driver present and so you don't > see the issue. I guess I am not surprised by that but I am not sure why > you are now saying you have no idea why this is timing out? I thought > this was understood. Oh, come on... The issue is a combination of old dt AND the pcie driver. I can reproduce low emc clock with that. But then you said t194 on the regression bench was still timing out even with the new dt. And that's what I cannot reproduce. And then you asked me to test with pure mainline and a stock/unmodified defconfig. So I did, using -next and the two open series, but clarified what an unmodified defconfig meant. So, I modified the .config to enable the pcie driver as built-in, then reflashed. Otherwise the same as my previous post. I got the shell after 11 seconds. And clocks are still as reported before, cpu at min, emc at 800000000. Aaron
On Thu, Dec 11, 2025 at 11:39 AM Aaron Kling <webgeek1234@gmail.com> wrote: > > On Thu, Dec 11, 2025 at 1:47 AM Jon Hunter <jonathanh@nvidia.com> wrote: > > > > > > On 10/12/2025 22:41, Aaron Kling wrote: > > > On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote: > > >> > > >> > > >> On 10/12/2025 18:32, Aaron Kling wrote: > > >>> On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote: > > >>>> > > >>>> > > >>>> On 10/12/2025 05:06, Aaron Kling wrote: > > >>>> > > >>>> ... > > >>>> > > >>>>> Let me try to iterate the potential issues I've seen stated here. If > > >>>>> I'm missing anything, please fill in the blanks. > > >>>>> > > >>>>> 1) If this change is applied without the related dt change and the > > >>>>> pcie drvier is loaded, the emc clock can become stuck at the lowest > > >>>>> rate. This is caused by the pcie driver providing icc data, but > > >>>>> nothing else is. So the very low requested bandwidth results in the > > >>>>> emc clock being set very low. I'm not sure there is a 'fix' for this, > > >>>>> beyond making sure the dt change is merged to ensure that the cpufreq > > >>>>> driver provides bandwidth info, causing the emc driver to select a > > >>>>> more reasonable emc clock rate. This is a similar situation to what's > > >>>>> currently blocking the tegra210 actmon series. I don't think there is > > >>>>> a way for the drivers to know if icc data is missing/wrong. The > > >>>>> scaling is doing exactly what it's told based on the icc routing given > > >>>>> in the dt. > > >>>> > > >>>> So this is the fundamental issue with this that must be fixed. We can't > > >>>> allow the PCIe driver to slow the system down. I think that Krzysztof > > >>>> suggested we need some way to determine if the necessary ICC clients are > > >>>> present/registered for ICC to work. Admittedly, I have no idea if there > > >>>> is a simple way to do this, but we need something like that. > > >>> > > >>> I'm not sure I understand how checking clients would work. Is there a > > >>> mechanism for the emc driver to know if cpufreq is registered to icc > > >>> in a way that works with probe deferrals, but also allows for it to be > > >>> optional? > > >> > > >> I am not sure if such a mechanism exists either, but it seems that we > > >> need something like this. > > >> > > >>> Alternatively if there is not, can we just accept the abi break and > > >>> have this and the dt change depend on each other? I know it's not > > >>> desirable or the first choice, but if the other option is to rewrite > > >>> part of the icc system, then perhaps it should be an option. > > >> > > >> I am not sure it is an ABI break, but the default performance might be > > >> worse. I am not sure if you are proposing a way to enforce the > > >> dependency or just saying that there is a dependency. We can't do the > > >> latter, but if there is a way for the kernel to check the dependency and > > >> make the right choice, then that should work. > > > > > > So we can't accept that older dt's will run slower on a newer kernel > > > and say that a newer dt is needed for full performance? > > > > > > If that's not an option, then I have no idea how to resolve this. I'm > > > not greatly knowledgeable about the icc subsystem. I can try to look > > > into options, but I'm not greatly optimistic about me finding one. If > > > someone could suggest a concept on how to make it work, I could > > > implement it. But I'm not even seeing the concept right now. > > > > > >>>>> 2) Jon, you report that even with both this change and the related dt > > >>>>> change, that the issue is still not fixed. But then posted a log > > >>>>> showing that the emc rate is set to max. If the issue is that emc rate > > >>>>> is too low, then how can debugfs report that the rate is max? For > > >>>>> reference, everything scales as expected for me given this change plus > > >>>>> the dt change on both p2771 and p3636+p3509. > > >>>> > > >>>> To clarify, this broke the boot test on Tegra194 because the boot was > > >>>> too slow. However, this also broke the EMC test on Tegra186 because > > >>>> setting the frequency from the debugfs failed. So two different failures > > >>>> on two different devices. I am guessing the EMC test would also fail on > > >>>> Tegra194, but given that it does not boot, we did not get that far. > > >>> > > >>> So you're saying that even with the dt changes, this change on > > >>> tegra194 still does not boot before the regression test framework > > >>> times out? If so, I need some more details about this. I have not seen > > >>> issues on p2972 or p3518. For example, if I boot to android recovery > > >>> where I set the cpufreq governor to performance, I see emc clock rate > > >>> set to 2133 MHz and 1600 MHz respectively. And boot time from kernel > > >>> start to pixels on display is 15 seconds, give or take a couple > > >>> seconds. This is using the boot stack from l4t r32.7.6. > > >> > > >> Yes. The boot failure here is not a hard boot failure, but the device > > >> takes too long to boot and the boot test times out. And no we will not > > >> increase the timeout as it is there for a reason. It could well be > > >> because the default governor is not set to performance. If you boot with > > >> just using the stock 'defconfig' for ARM64 without setting the governor > > >> does it take longer? > > > > > > So, I checked out next-20251210, then b4 shazam'ed this series and the > > > matching dt series, > > > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with > > > LLVM=1 ARCH=arm64 make defconfig > > > LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb > > > > > > I packaged them into an android boot image using a lightly modified > > > copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note > > > that this includes no modules, and since the pcie driver is =m in > > > defconfig, it is not included. Then I flashed that with the l4t > > > r32.7.6 boot stack to p2972. I got the shell on uart after 4.275 > > > seconds in the kernel. Per sysfs, the cpufreq governor is schedutil > > > and all policies are idling at min freq, 115200. And per debugfs, the > > > emc clock is 800000000. All this looks to be as expected. > > > > > > I have no idea why the regression test setup is timing out. I have not > > > seen the issue through any of my testing. On pure mainline as per the > > > above paragraph, or with the patches on the android common kernel, as > > > per my target use case. I don't know what to do if I can't replicate > > > the issue. I don't suppose the flash package for the regression test > > > setup is something that could be released? > > > > I thought we already concluded that you did not see this because you did > > not have the PCIe module present in your testing? From the above its > > sounds like you still don't have that driver present and so you don't > > see the issue. I guess I am not surprised by that but I am not sure why > > you are now saying you have no idea why this is timing out? I thought > > this was understood. > > Oh, come on... The issue is a combination of old dt AND the pcie > driver. I can reproduce low emc clock with that. But then you said > t194 on the regression bench was still timing out even with the new > dt. And that's what I cannot reproduce. And then you asked me to test > with pure mainline and a stock/unmodified defconfig. So I did, using > -next and the two open series, but clarified what an unmodified > defconfig meant. > > So, I modified the .config to enable the pcie driver as built-in, then > reflashed. Otherwise the same as my previous post. I got the shell > after 11 seconds. And clocks are still as reported before, cpu at min, > emc at 800000000. To try to move a resolution along, let me try to enumerate the issues again. Again, please clarify should I have something incorrect or incomplete. 1) The primary issue is when an old dtb is used with this commit and the pcie driver is loaded. I can reproduce this issue on t186 and t194. If this becomes the sole remaining blocking issue, I would like for an exception to the normal rule be considered and this merged anyways. Since it does not cause a boot failure and distros package a new dt normally anyways. And to my knowledge, working around this would involve redoing part off the icc subsystem itself, a major task in comparison. 2) T194 is reported to have low clocks even with a new dt on the Nvidia regression bench. I cannot reproduce this, even with the pcie driver loaded. Can this be re-verified, please? And if it still happens, can logs from the failure be made available and/or more information provided as to the state of the unit? Like changes to the default defconfig, modules that get loaded, etc. 3) Setting the max clock via debugfs fails when icc has pushed the current clock higher than the requested rate. This is a logic issue with all tegra emc drivers that implement dfs via icc. The suggested resolutions are to leave this as is to keep consistency with the existing drivers, perhaps updating all later, or to update the existing implementations in a separate series, then send a new revision here to match. I am personally unable to verify anything older than tegra124, however. Aaron
On 17/12/2025 18:39, Aaron Kling wrote: ... > To try to move a resolution along, let me try to enumerate the issues > again. Again, please clarify should I have something incorrect or > incomplete. > > 1) The primary issue is when an old dtb is used with this commit and > the pcie driver is loaded. I can reproduce this issue on t186 and > t194. If this becomes the sole remaining blocking issue, I would like > for an exception to the normal rule be considered and this merged > anyways. Since it does not cause a boot failure and distros package a > new dt normally anyways. And to my knowledge, working around this > would involve redoing part off the icc subsystem itself, a major task > in comparison. > > 2) T194 is reported to have low clocks even with a new dt on the > Nvidia regression bench. I cannot reproduce this, even with the pcie > driver loaded. Can this be re-verified, please? And if it still > happens, can logs from the failure be made available and/or more > information provided as to the state of the unit? Like changes to the > default defconfig, modules that get loaded, etc. Can you list all the patches that need to be applied on top of the current -next and I will run it through our testing to make sure I have this correct. > 3) Setting the max clock via debugfs fails when icc has pushed the > current clock higher than the requested rate. This is a logic issue > with all tegra emc drivers that implement dfs via icc. The suggested > resolutions are to leave this as is to keep consistency with the > existing drivers, perhaps updating all later, or to update the > existing implementations in a separate series, then send a new > revision here to match. I am personally unable to verify anything > older than tegra124, however. Thierry and I chatted about this last week and we feel that debugfs should be able to override the current configuration. So this will need to be addressed as well. Jon -- nvpublic
On Wed, Dec 17, 2025 at 12:59 PM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 17/12/2025 18:39, Aaron Kling wrote: > > ... > > > To try to move a resolution along, let me try to enumerate the issues > > again. Again, please clarify should I have something incorrect or > > incomplete. > > > > 1) The primary issue is when an old dtb is used with this commit and > > the pcie driver is loaded. I can reproduce this issue on t186 and > > t194. If this becomes the sole remaining blocking issue, I would like > > for an exception to the normal rule be considered and this merged > > anyways. Since it does not cause a boot failure and distros package a > > new dt normally anyways. And to my knowledge, working around this > > would involve redoing part off the icc subsystem itself, a major task > > in comparison. > > > > 2) T194 is reported to have low clocks even with a new dt on the > > Nvidia regression bench. I cannot reproduce this, even with the pcie > > driver loaded. Can this be re-verified, please? And if it still > > happens, can logs from the failure be made available and/or more > > information provided as to the state of the unit? Like changes to the > > default defconfig, modules that get loaded, etc. > > Can you list all the patches that need to be applied on top of the > current -next and I will run it through our testing to make sure I have > this correct. This series, message id: 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com. And the dt series, message id: 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. So, my build sequence is: git checkout next-20251217 b4 shazam 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com b4 shazam 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com LLVM=1 ARCH=arm64 make defconfig *edit .config to set CONFIG_PCIE_TEGRA194, CONFIG_PCIE_TEGRA194_HOST, and CONFIG_PCIE_TEGRA194_EP to =y* LLVM=1 ARCH=arm64 make olddefconfig LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb I then flash those with no modules, packaged with the simple ramdisk, and I get a shell at 11.2 seconds and emc rate is 800 MHz at idle. > > 3) Setting the max clock via debugfs fails when icc has pushed the > > current clock higher than the requested rate. This is a logic issue > > with all tegra emc drivers that implement dfs via icc. The suggested > > resolutions are to leave this as is to keep consistency with the > > existing drivers, perhaps updating all later, or to update the > > existing implementations in a separate series, then send a new > > revision here to match. I am personally unable to verify anything > > older than tegra124, however. > > Thierry and I chatted about this last week and we feel that debugfs > should be able to override the current configuration. So this will need > to be addressed as well. Alright. I will start looking at getting that logic straight, then upload a new series for the older archs and a new revision of this. Aaron
On 17/12/2025 20:29, Aaron Kling wrote: > On Wed, Dec 17, 2025 at 12:59 PM Jon Hunter <jonathanh@nvidia.com> wrote: >> >> >> On 17/12/2025 18:39, Aaron Kling wrote: >> >> ... >> >>> To try to move a resolution along, let me try to enumerate the issues >>> again. Again, please clarify should I have something incorrect or >>> incomplete. >>> >>> 1) The primary issue is when an old dtb is used with this commit and >>> the pcie driver is loaded. I can reproduce this issue on t186 and >>> t194. If this becomes the sole remaining blocking issue, I would like >>> for an exception to the normal rule be considered and this merged >>> anyways. Since it does not cause a boot failure and distros package a >>> new dt normally anyways. And to my knowledge, working around this >>> would involve redoing part off the icc subsystem itself, a major task >>> in comparison. >>> >>> 2) T194 is reported to have low clocks even with a new dt on the >>> Nvidia regression bench. I cannot reproduce this, even with the pcie >>> driver loaded. Can this be re-verified, please? And if it still >>> happens, can logs from the failure be made available and/or more >>> information provided as to the state of the unit? Like changes to the >>> default defconfig, modules that get loaded, etc. >> >> Can you list all the patches that need to be applied on top of the >> current -next and I will run it through our testing to make sure I have >> this correct. > > This series, message id: > 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com. And the dt > series, message id: > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. So, my build > sequence is: > > git checkout next-20251217 > b4 shazam 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com > b4 shazam 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com Thanks I added all these on top of next-20251216 (as that is the latest I have tested) and Tegra194 fails to boot. We always include all the modules in the rootfs that is being tested. You can see the boot log here [0]. We are using an NFS rootfs for testing and I see a message related to the NFS server not responding. I am guessing something is running too slow again because the only thing I changed was adding your patches. The test harness reports it is timing out ... FAILED: Linux Boot Test 1 Test Owner(s): N/A Execution Time 219.31 sec Test TIMEOUT reached. Test did not report results in 120 secs Percent passed so far: 0.0 >>> 3) Setting the max clock via debugfs fails when icc has pushed the >>> current clock higher than the requested rate. This is a logic issue >>> with all tegra emc drivers that implement dfs via icc. The suggested >>> resolutions are to leave this as is to keep consistency with the >>> existing drivers, perhaps updating all later, or to update the >>> existing implementations in a separate series, then send a new >>> revision here to match. I am personally unable to verify anything >>> older than tegra124, however. >> >> Thierry and I chatted about this last week and we feel that debugfs >> should be able to override the current configuration. So this will need >> to be addressed as well. > > Alright. I will start looking at getting that logic straight, then > upload a new series for the older archs and a new revision of this. And just to confirm the test that sets the EMC frequency via the debugfs also still fails. Jon [0] https://pastebin.com/5ghbSsu7 -- nvpublic
On Wed, Dec 17, 2025 at 3:53 PM Jon Hunter <jonathanh@nvidia.com> wrote: > > > On 17/12/2025 20:29, Aaron Kling wrote: > > On Wed, Dec 17, 2025 at 12:59 PM Jon Hunter <jonathanh@nvidia.com> wrote: > >> > >> > >> On 17/12/2025 18:39, Aaron Kling wrote: > >> > >> ... > >> > >>> To try to move a resolution along, let me try to enumerate the issues > >>> again. Again, please clarify should I have something incorrect or > >>> incomplete. > >>> > >>> 1) The primary issue is when an old dtb is used with this commit and > >>> the pcie driver is loaded. I can reproduce this issue on t186 and > >>> t194. If this becomes the sole remaining blocking issue, I would like > >>> for an exception to the normal rule be considered and this merged > >>> anyways. Since it does not cause a boot failure and distros package a > >>> new dt normally anyways. And to my knowledge, working around this > >>> would involve redoing part off the icc subsystem itself, a major task > >>> in comparison. > >>> > >>> 2) T194 is reported to have low clocks even with a new dt on the > >>> Nvidia regression bench. I cannot reproduce this, even with the pcie > >>> driver loaded. Can this be re-verified, please? And if it still > >>> happens, can logs from the failure be made available and/or more > >>> information provided as to the state of the unit? Like changes to the > >>> default defconfig, modules that get loaded, etc. > >> > >> Can you list all the patches that need to be applied on top of the > >> current -next and I will run it through our testing to make sure I have > >> this correct. > > > > This series, message id: > > 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com. And the dt > > series, message id: > > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. So, my build > > sequence is: > > > > git checkout next-20251217 > > b4 shazam 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com > > b4 shazam 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com > > Thanks I added all these on top of next-20251216 (as that is the latest > I have tested) and Tegra194 fails to boot. We always include all the > modules in the rootfs that is being tested. You can see the boot log > here [0]. We are using an NFS rootfs for testing and I see a message > related to the NFS server not responding. I am guessing something is > running too slow again because the only thing I changed was adding your > patches. The test harness reports it is timing out ... > > FAILED: Linux Boot Test 1 > Test Owner(s): N/A > Execution Time 219.31 sec > Test TIMEOUT reached. Test did not report results in 120 secs > Percent passed so far: 0.0 Okay, so. Modules are in the rootfs, none get copied to the initramfs? And the rootfs is on nfs? And for this failure, nfs never gets mounted. So... for this case, no modules get loaded, implying that whatever is happening is happening with the built-in drivers. Which means this case isn't pcie related. Are there any modifications to the defconfig? It appears that there must be, to have dwc-eth-dwmac available. I will see if I can trigger anything when using ethernet. If this does eventually boot to a rootfs, as implied by the comments about debugs below, can you check to see what emc clock speed is after boot? > >>> 3) Setting the max clock via debugfs fails when icc has pushed the > >>> current clock higher than the requested rate. This is a logic issue > >>> with all tegra emc drivers that implement dfs via icc. The suggested > >>> resolutions are to leave this as is to keep consistency with the > >>> existing drivers, perhaps updating all later, or to update the > >>> existing implementations in a separate series, then send a new > >>> revision here to match. I am personally unable to verify anything > >>> older than tegra124, however. > >> > >> Thierry and I chatted about this last week and we feel that debugfs > >> should be able to override the current configuration. So this will need > >> to be addressed as well. > > > > Alright. I will start looking at getting that logic straight, then > > upload a new series for the older archs and a new revision of this. > > And just to confirm the test that sets the EMC frequency via the debugfs > also still fails. > > Jon > > [0] https://pastebin.com/5ghbSsu7 > > -- > nvpublic > Aaron
On 17/12/2025 22:44, Aaron Kling wrote:
...
>> Thanks I added all these on top of next-20251216 (as that is the latest
>> I have tested) and Tegra194 fails to boot. We always include all the
>> modules in the rootfs that is being tested. You can see the boot log
>> here [0]. We are using an NFS rootfs for testing and I see a message
>> related to the NFS server not responding. I am guessing something is
>> running too slow again because the only thing I changed was adding your
>> patches. The test harness reports it is timing out ...
>>
>> FAILED: Linux Boot Test 1
>> Test Owner(s): N/A
>> Execution Time 219.31 sec
>> Test TIMEOUT reached. Test did not report results in 120 secs
>> Percent passed so far: 0.0
>
> Okay, so. Modules are in the rootfs, none get copied to the initramfs?
> And the rootfs is on nfs? And for this failure, nfs never gets
> mounted. So... for this case, no modules get loaded, implying that
> whatever is happening is happening with the built-in drivers. Which
> means this case isn't pcie related. Are there any modifications to the
> defconfig? It appears that there must be, to have dwc-eth-dwmac
> available. I will see if I can trigger anything when using ethernet.
If you look at the boot log you will see ...
[ 7.839012] Root device found: nfs
[ 7.908307] Ethernet interface: eth0
[ 7.929765] IP Address: 192.168.99.2
[ 8.173978] Rootfs mounted over nfs
[ 8.306291] Switching from initrd to actual rootfs
So it does mount the rootfs and so the modules would be loaded. I
believe that PCIe is definitely loaded because that is what I observed
before. And yes there are a few modifications to the defconfig that we
make on top (that have been added over the years for various reasons) ...
CONFIG_ARM64_PMEM=y
CONFIG_BROADCOM_PHY=y
CONFIG_DWMAC_DWC_QOS_ETH=y
CONFIG_EEPROM_AT24=m
CONFIG_EXTRA_FIRMWARE="nvidia/tegra210/xusb.bin nvidia/tegra186/xusb.bin
nvidia/tegra194/xusb.bin rtl_nic/rtl8153a-3.fw rtl_nic/rtl8168h-2.fw"
CONFIG_EXTRA_FIRMWARE_DIR="${KERNEL_FW_DIR}"
CONFIG_MARVELL_PHY=y
CONFIG_R8169=y
CONFIG_RANDOMIZE_BASE=n
CONFIG_SERIAL_TEGRA_TCU=y
CONFIG_SERIAL_TEGRA_TCU_CONSOLE=y
CONFIG_STAGING=y
CONFIG_STAGING_MEDIA=y
CONFIG_STMMAC_ETH=y
CONFIG_STMMAC_PLATFORM=y
CONFIG_USB_RTL8152=y
CONFIG_VIDEO_TEGRA=m
CONFIG_VIDEO_TEGRA_TPG=y
CONFIG_DWMAC_TEGRA=y
Looking at the boot log I see ...
[ 3.854658] cpu cpu0: cpufreq_init: failed to get clk: -2
[ 3.854927] cpu cpu0: cpufreq_init: failed to get clk: -2
[ 3.855218] cpu cpu2: cpufreq_init: failed to get clk: -2
[ 3.858438] cpu cpu2: cpufreq_init: failed to get clk: -2
[ 3.863987] cpu cpu4: cpufreq_init: failed to get clk: -2
[ 3.869741] cpu cpu4: cpufreq_init: failed to get clk: -2
[ 3.875006] cpu cpu6: cpufreq_init: failed to get clk: -2
[ 3.880725] cpu cpu6: cpufreq_init: failed to get clk: -2
[ 3.886018] cpufreq-dt cpufreq-dt: failed register driver: -19
So actually, I am now wondering if this is the problem?
Jon
--
nvpublic
On Thu, Dec 18, 2025 at 5:12 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 17/12/2025 22:44, Aaron Kling wrote:
>
> ...
>
> >> Thanks I added all these on top of next-20251216 (as that is the latest
> >> I have tested) and Tegra194 fails to boot. We always include all the
> >> modules in the rootfs that is being tested. You can see the boot log
> >> here [0]. We are using an NFS rootfs for testing and I see a message
> >> related to the NFS server not responding. I am guessing something is
> >> running too slow again because the only thing I changed was adding your
> >> patches. The test harness reports it is timing out ...
> >>
> >> FAILED: Linux Boot Test 1
> >> Test Owner(s): N/A
> >> Execution Time 219.31 sec
> >> Test TIMEOUT reached. Test did not report results in 120 secs
> >> Percent passed so far: 0.0
> >
> > Okay, so. Modules are in the rootfs, none get copied to the initramfs?
> > And the rootfs is on nfs? And for this failure, nfs never gets
> > mounted. So... for this case, no modules get loaded, implying that
> > whatever is happening is happening with the built-in drivers. Which
> > means this case isn't pcie related. Are there any modifications to the
> > defconfig? It appears that there must be, to have dwc-eth-dwmac
> > available. I will see if I can trigger anything when using ethernet.
>
> If you look at the boot log you will see ...
>
> [ 7.839012] Root device found: nfs
> [ 7.908307] Ethernet interface: eth0
> [ 7.929765] IP Address: 192.168.99.2
> [ 8.173978] Rootfs mounted over nfs
> [ 8.306291] Switching from initrd to actual rootfs
>
> So it does mount the rootfs and so the modules would be loaded. I
But the bottom of the log says:
[ 188.360095] nfs: server 192.168.99.1 not responding, still trying
So does it mount nfs and load modules, and *then* fail to talk to the
nfs server? That doesn't make any sense. And I don't see any logs from
driver probes after the rootfs line. And there's sync_state lines
stating that pcie among others isn't available.
> believe that PCIe is definitely loaded because that is what I observed
> before. And yes there are a few modifications to the defconfig that we
> make on top (that have been added over the years for various reasons) ...
>
> CONFIG_ARM64_PMEM=y
> CONFIG_BROADCOM_PHY=y
> CONFIG_DWMAC_DWC_QOS_ETH=y
> CONFIG_EEPROM_AT24=m
> CONFIG_EXTRA_FIRMWARE="nvidia/tegra210/xusb.bin nvidia/tegra186/xusb.bin
> nvidia/tegra194/xusb.bin rtl_nic/rtl8153a-3.fw rtl_nic/rtl8168h-2.fw"
> CONFIG_EXTRA_FIRMWARE_DIR="${KERNEL_FW_DIR}"
> CONFIG_MARVELL_PHY=y
> CONFIG_R8169=y
> CONFIG_RANDOMIZE_BASE=n
> CONFIG_SERIAL_TEGRA_TCU=y
> CONFIG_SERIAL_TEGRA_TCU_CONSOLE=y
> CONFIG_STAGING=y
> CONFIG_STAGING_MEDIA=y
> CONFIG_STMMAC_ETH=y
> CONFIG_STMMAC_PLATFORM=y
> CONFIG_USB_RTL8152=y
> CONFIG_VIDEO_TEGRA=m
> CONFIG_VIDEO_TEGRA_TPG=y
> CONFIG_DWMAC_TEGRA=y
I will incorporate these to a build and see if I get any different results.
> Looking at the boot log I see ...
>
> [ 3.854658] cpu cpu0: cpufreq_init: failed to get clk: -2
> [ 3.854927] cpu cpu0: cpufreq_init: failed to get clk: -2
> [ 3.855218] cpu cpu2: cpufreq_init: failed to get clk: -2
> [ 3.858438] cpu cpu2: cpufreq_init: failed to get clk: -2
> [ 3.863987] cpu cpu4: cpufreq_init: failed to get clk: -2
> [ 3.869741] cpu cpu4: cpufreq_init: failed to get clk: -2
> [ 3.875006] cpu cpu6: cpufreq_init: failed to get clk: -2
> [ 3.880725] cpu cpu6: cpufreq_init: failed to get clk: -2
> [ 3.886018] cpufreq-dt cpufreq-dt: failed register driver: -19
>
> So actually, I am now wondering if this is the problem?
These lines are from cpufreq-dt trying to manage the cpu's directly,
which it's not supposed to do. tegra194-cpufreq is supposed to manage
them. I see these lines as well, when things are operating as
expected. The real driver doesn't log anything, but the policies are
visible in sysfs. I did a little bit of digging previously to see if I
could remove the log churn, but was unable to do so. I would have to
double check to be completely sure, but I am fairly certain I saw
these lines before my changes as well. It's something that would be
good to get fixed, but I don't think it's operable here.
Aaron
On Thu, Dec 18, 2025 at 1:25 PM Aaron Kling <webgeek1234@gmail.com> wrote:
>
> On Thu, Dec 18, 2025 at 5:12 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >
> >
> > On 17/12/2025 22:44, Aaron Kling wrote:
> >
> > ...
> >
> > >> Thanks I added all these on top of next-20251216 (as that is the latest
> > >> I have tested) and Tegra194 fails to boot. We always include all the
> > >> modules in the rootfs that is being tested. You can see the boot log
> > >> here [0]. We are using an NFS rootfs for testing and I see a message
> > >> related to the NFS server not responding. I am guessing something is
> > >> running too slow again because the only thing I changed was adding your
> > >> patches. The test harness reports it is timing out ...
> > >>
> > >> FAILED: Linux Boot Test 1
> > >> Test Owner(s): N/A
> > >> Execution Time 219.31 sec
> > >> Test TIMEOUT reached. Test did not report results in 120 secs
> > >> Percent passed so far: 0.0
> > >
> > > Okay, so. Modules are in the rootfs, none get copied to the initramfs?
> > > And the rootfs is on nfs? And for this failure, nfs never gets
> > > mounted. So... for this case, no modules get loaded, implying that
> > > whatever is happening is happening with the built-in drivers. Which
> > > means this case isn't pcie related. Are there any modifications to the
> > > defconfig? It appears that there must be, to have dwc-eth-dwmac
> > > available. I will see if I can trigger anything when using ethernet.
> >
> > If you look at the boot log you will see ...
> >
> > [ 7.839012] Root device found: nfs
> > [ 7.908307] Ethernet interface: eth0
> > [ 7.929765] IP Address: 192.168.99.2
> > [ 8.173978] Rootfs mounted over nfs
> > [ 8.306291] Switching from initrd to actual rootfs
> >
> > So it does mount the rootfs and so the modules would be loaded. I
>
> But the bottom of the log says:
> [ 188.360095] nfs: server 192.168.99.1 not responding, still trying
>
> So does it mount nfs and load modules, and *then* fail to talk to the
> nfs server? That doesn't make any sense. And I don't see any logs from
> driver probes after the rootfs line. And there's sync_state lines
> stating that pcie among others isn't available.
>
> > believe that PCIe is definitely loaded because that is what I observed
> > before. And yes there are a few modifications to the defconfig that we
> > make on top (that have been added over the years for various reasons) ...
> >
> > CONFIG_ARM64_PMEM=y
> > CONFIG_BROADCOM_PHY=y
> > CONFIG_DWMAC_DWC_QOS_ETH=y
> > CONFIG_EEPROM_AT24=m
> > CONFIG_EXTRA_FIRMWARE="nvidia/tegra210/xusb.bin nvidia/tegra186/xusb.bin
> > nvidia/tegra194/xusb.bin rtl_nic/rtl8153a-3.fw rtl_nic/rtl8168h-2.fw"
> > CONFIG_EXTRA_FIRMWARE_DIR="${KERNEL_FW_DIR}"
> > CONFIG_MARVELL_PHY=y
> > CONFIG_R8169=y
> > CONFIG_RANDOMIZE_BASE=n
> > CONFIG_SERIAL_TEGRA_TCU=y
> > CONFIG_SERIAL_TEGRA_TCU_CONSOLE=y
> > CONFIG_STAGING=y
> > CONFIG_STAGING_MEDIA=y
> > CONFIG_STMMAC_ETH=y
> > CONFIG_STMMAC_PLATFORM=y
> > CONFIG_USB_RTL8152=y
> > CONFIG_VIDEO_TEGRA=m
> > CONFIG_VIDEO_TEGRA_TPG=y
> > CONFIG_DWMAC_TEGRA=y
>
> I will incorporate these to a build and see if I get any different results.
>
> > Looking at the boot log I see ...
> >
> > [ 3.854658] cpu cpu0: cpufreq_init: failed to get clk: -2
> > [ 3.854927] cpu cpu0: cpufreq_init: failed to get clk: -2
> > [ 3.855218] cpu cpu2: cpufreq_init: failed to get clk: -2
> > [ 3.858438] cpu cpu2: cpufreq_init: failed to get clk: -2
> > [ 3.863987] cpu cpu4: cpufreq_init: failed to get clk: -2
> > [ 3.869741] cpu cpu4: cpufreq_init: failed to get clk: -2
> > [ 3.875006] cpu cpu6: cpufreq_init: failed to get clk: -2
> > [ 3.880725] cpu cpu6: cpufreq_init: failed to get clk: -2
> > [ 3.886018] cpufreq-dt cpufreq-dt: failed register driver: -19
> >
> > So actually, I am now wondering if this is the problem?
>
> These lines are from cpufreq-dt trying to manage the cpu's directly,
> which it's not supposed to do. tegra194-cpufreq is supposed to manage
> them. I see these lines as well, when things are operating as
> expected. The real driver doesn't log anything, but the policies are
> visible in sysfs. I did a little bit of digging previously to see if I
> could remove the log churn, but was unable to do so. I would have to
> double check to be completely sure, but I am fairly certain I saw
> these lines before my changes as well. It's something that would be
> good to get fixed, but I don't think it's operable here.
Turns out, this is actually semi-operable. There's a blocklist in the
cpufreq-dt driver that includes all tegra archs <= t234 except for
t186 and t194. If I add t194 to that list, then the log lines go away.
However, it does not fix the nfs boot issue. I was finally able to
replicate it by setting up my own nfs rootfs. This series does not
affect it though, fwiw, it's the dt series that triggers this. Before
it, nfsroot boots as expected. After it, the reported issue happens.
After adding t194 to the cpufreq-dt blocklist, the issue still
happens. But... if I add "blacklist=cpufreq-dt" to the kernel
bootargs, nfs works again. I don't get this.
So, summary:
* Adding opp tables to the cpu nodes causes cpufreq-dt to try to
handle cpufreq for the soc
* Adding tegra194 to the cpufreq-dt-platdev blocklist stops log
messages about the attempt
* However, it still affects the ethernet driver, causing watchdog
timeouts and adapter resets
* Blacklisting the cpufreq-dt driver entirely prevents the issue
I'm not sure what to make of this. Anyone have thoughts? I will send a
patch separately to add t186 and t194 to the cpufreq-dt-platdev block
list as this needs to happen in any case.
Aaron
On 18/12/2025 21:20, Aaron Kling wrote: ... > Turns out, this is actually semi-operable. There's a blocklist in the > cpufreq-dt driver that includes all tegra archs <= t234 except for > t186 and t194. If I add t194 to that list, then the log lines go away. > However, it does not fix the nfs boot issue. I was finally able to > replicate it by setting up my own nfs rootfs. This series does not > affect it though, fwiw, it's the dt series that triggers this. Before > it, nfsroot boots as expected. After it, the reported issue happens. > After adding t194 to the cpufreq-dt blocklist, the issue still > happens. But... if I add "blacklist=cpufreq-dt" to the kernel > bootargs, nfs works again. I don't get this. > > So, summary: > * Adding opp tables to the cpu nodes causes cpufreq-dt to try to > handle cpufreq for the soc > * Adding tegra194 to the cpufreq-dt-platdev blocklist stops log > messages about the attempt > * However, it still affects the ethernet driver, causing watchdog > timeouts and adapter resets > * Blacklisting the cpufreq-dt driver entirely prevents the issue > > I'm not sure what to make of this. Anyone have thoughts? I will send a > patch separately to add t186 and t194 to the cpufreq-dt-platdev block > list as this needs to happen in any case. Great glad you see the same and thanks for the summary. Have you looked at what the CPU and EMC frequencies are doing? I still don't understand the connection to the ethernet driver. Have you tried setting the performance governor for CPUFREQ to see if that works? That would tell us if the CPU speed is related. Jon -- nvpublic
On 12/11/2025 08:21, Aaron Kling wrote: > On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote: >> >> >> On 11/11/2025 23:17, Aaron Kling wrote: >> >> ... >> >>> Alright, I think I've got the picture of what's going on now. The >>> standard arm64 defconfig enables the t194 pcie driver as a module. And >>> my simple busybox ramdisk that I use for mainline regression testing >>> isn't loading any modules. If I set the pcie driver to built-in, I >>> replicate the issue. And I don't see the issue on my normal use case, >>> because I have the dt changes as well. >>> >>> So it appears that the pcie driver submits icc bandwidth. And without >>> cpufreq submitting bandwidth as well, the emc driver gets a very low >>> number and thus sets a very low emc freq. The question becomes... what >>> to do about it? If the related dt changes were submitted to >>> linux-next, everything should fall into place. And I'm not sure where >>> this falls on the severity scale since it doesn't full out break boot >>> or prevent operation. >> >> Where are the related DT changes? If we can get these into -next and >> lined up to be merged for v6.19, then that is fine. However, we should >> not merge this for v6.19 without the DT changes. > > The dt changes are here [0]. > > This was all part of the same series, keeping everything logically > related together. But on v2, Krzysztof said that none of this should I asked you about dependencies between the patches and you said there are none, so collecting different subsystems into one is wrong. That's nothing new, standard Linux kernel process. What is non-standard here is keeping secret that there is impact on users. > have ever been together and that each subsystem should get a separate > series, even if the changes are related. Which I did, and now this is > split across three series. The actmon series for tegra210 is in a > similar state. Split across four series and only one has been pulled > to linux-next. > >> I will also talk with Thierry to see if he has any concerns about users >> seeing slow performance if they don't have an up-to-date DTB. >> >> Is there any easy way to detect if the DTB has he necessary properties >> to enable ICC scaling? > > I'm not sure there is any simple way, given how I set up tegra186 and > tegra194. The new dt properties are on the cpu nodes, there's nothing > new for the emc node. So the emc driver just unconditionally declares > itself to icc. It was doing this before too, but wouldn't do anything > on tegra186 or tegra194 because the set_bw function was just a stub > and the real logic happened in the bpmp bw mgr, which only exists on > tegra234+. Now the set_bw function will directly calculate and set the > emc clock as long as the bpmp bw mgr is not supported. Offhand, I > can't think of anything existing to check to skip this, because > nothing new in the dt has been added in the scope of emc. If your ICC triggers without users, I think it is usual case - you should not enable the sync_state but instead keep it disabled till you have all the consumers in place. Best regards, Krzysztof
© 2016 - 2026 Red Hat, Inc.