[v4] memory: tegra: Support EMC dfs on Tegra186/Tegra194

[PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling via B4 Relay 3 months, 1 week ago

From: Aaron Kling <webgeek1234@gmail.com>

This adds support for dynamic frequency scaling of external memory on
devices with bpmp firmware that does not support bwmgr.

Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
---
 drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
 1 file changed, 130 insertions(+), 2 deletions(-)

diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
--- a/drivers/memory/tegra/tegra186-emc.c
+++ b/drivers/memory/tegra/tegra186-emc.c
@@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
 	unsigned long rate;
 };
 
+enum emc_rate_request_type {
+	EMC_RATE_DEBUG,
+	EMC_RATE_ICC,
+	EMC_RATE_TYPE_MAX,
+};
+
+struct emc_rate_request {
+	unsigned long min_rate;
+	unsigned long max_rate;
+};
+
 struct tegra186_emc {
 	struct tegra_bpmp *bpmp;
 	struct device *dev;
@@ -33,8 +44,90 @@ struct tegra186_emc {
 	} debugfs;
 
 	struct icc_provider provider;
+
+	/*
+	 * There are multiple sources in the EMC driver which could request
+	 * a min/max clock rate, these rates are contained in this array.
+	 */
+	struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
+
+	/* protect shared rate-change code path */
+	struct mutex rate_lock;
 };
 
+static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
+{
+	unsigned int i;
+
+	for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
+		emc->requested_rate[i].min_rate = 0;
+		emc->requested_rate[i].max_rate = ULONG_MAX;
+	}
+}
+
+static int emc_request_rate(struct tegra186_emc *emc,
+			    unsigned long new_min_rate,
+			    unsigned long new_max_rate,
+			    enum emc_rate_request_type type)
+{
+	struct emc_rate_request *req = emc->requested_rate;
+	unsigned long min_rate = 0, max_rate = ULONG_MAX;
+	unsigned int i;
+	int err;
+
+	/* select minimum and maximum rates among the requested rates */
+	for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
+		if (i == type) {
+			min_rate = max(new_min_rate, min_rate);
+			max_rate = min(new_max_rate, max_rate);
+		} else {
+			min_rate = max(req->min_rate, min_rate);
+			max_rate = min(req->max_rate, max_rate);
+		}
+	}
+
+	if (min_rate > max_rate) {
+		dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
+				    __func__, type, min_rate, max_rate);
+		return -ERANGE;
+	}
+
+	err = clk_set_rate(emc->clk, min_rate);
+	if (err)
+		return err;
+
+	emc->requested_rate[type].min_rate = new_min_rate;
+	emc->requested_rate[type].max_rate = new_max_rate;
+
+	return 0;
+}
+
+static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
+			    enum emc_rate_request_type type)
+{
+	struct emc_rate_request *req = &emc->requested_rate[type];
+	int ret;
+
+	mutex_lock(&emc->rate_lock);
+	ret = emc_request_rate(emc, rate, req->max_rate, type);
+	mutex_unlock(&emc->rate_lock);
+
+	return ret;
+}
+
+static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
+			    enum emc_rate_request_type type)
+{
+	struct emc_rate_request *req = &emc->requested_rate[type];
+	int ret;
+
+	mutex_lock(&emc->rate_lock);
+	ret = emc_request_rate(emc, req->min_rate, rate, type);
+	mutex_unlock(&emc->rate_lock);
+
+	return ret;
+}
+
 /*
  * debugfs interface
  *
@@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
 	if (!tegra186_emc_validate_rate(emc, rate))
 		return -EINVAL;
 
-	err = clk_set_min_rate(emc->clk, rate);
+	err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
 	if (err < 0)
 		return err;
 
@@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
 	if (!tegra186_emc_validate_rate(emc, rate))
 		return -EINVAL;
 
-	err = clk_set_max_rate(emc->clk, rate);
+	err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
 	if (err < 0)
 		return err;
 
@@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
 	return 0;
 }
 
+static inline struct tegra186_emc *
+to_tegra186_emc_provider(struct icc_provider *provider)
+{
+	return container_of(provider, struct tegra186_emc, provider);
+}
+
 /*
  * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
  * @src: ICC node for External Memory Controller (EMC)
@@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
  */
 static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
 {
+	struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
+	struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
+	unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
+	unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
+	unsigned long long rate = max(avg_bw, peak_bw);
+	const unsigned int ddr = 2;
+	int err;
+
+	/*
+	 * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
+	 * Freq based on the passed values.
+	 */
+	if (mc->bwmgr_mrq_supported)
+		return 0;
+
+	/*
+	 * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
+	 * EMC clock rate is twice smaller than the peak data rate because
+	 * data is sampled on both EMC clock edges.
+	 */
+	do_div(rate, ddr);
+	rate = min_t(u64, rate, U32_MAX);
+
+	err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, emc);
 	emc->dev = &pdev->dev;
 
+	tegra186_emc_rate_requests_init(emc);
+
 	if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
 		err = tegra186_emc_get_emc_dvfs_latency(emc);
 		if (err)

-- 
2.51.0

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
> From: Aaron Kling <webgeek1234@gmail.com>
> 
> This adds support for dynamic frequency scaling of external memory on
> devices with bpmp firmware that does not support bwmgr.
> 
> Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
> ---
>   drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
>   1 file changed, 130 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
> index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
> --- a/drivers/memory/tegra/tegra186-emc.c
> +++ b/drivers/memory/tegra/tegra186-emc.c
> @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
>   	unsigned long rate;
>   };
>   
> +enum emc_rate_request_type {
> +	EMC_RATE_DEBUG,
> +	EMC_RATE_ICC,
> +	EMC_RATE_TYPE_MAX,
> +};
> +
> +struct emc_rate_request {
> +	unsigned long min_rate;
> +	unsigned long max_rate;
> +};
> +
>   struct tegra186_emc {
>   	struct tegra_bpmp *bpmp;
>   	struct device *dev;
> @@ -33,8 +44,90 @@ struct tegra186_emc {
>   	} debugfs;
>   
>   	struct icc_provider provider;
> +
> +	/*
> +	 * There are multiple sources in the EMC driver which could request
> +	 * a min/max clock rate, these rates are contained in this array.
> +	 */
> +	struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
> +
> +	/* protect shared rate-change code path */
> +	struct mutex rate_lock;
>   };
>   
> +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
> +		emc->requested_rate[i].min_rate = 0;
> +		emc->requested_rate[i].max_rate = ULONG_MAX;
> +	}
> +}
> +
> +static int emc_request_rate(struct tegra186_emc *emc,
> +			    unsigned long new_min_rate,
> +			    unsigned long new_max_rate,
> +			    enum emc_rate_request_type type)
> +{
> +	struct emc_rate_request *req = emc->requested_rate;
> +	unsigned long min_rate = 0, max_rate = ULONG_MAX;
> +	unsigned int i;
> +	int err;
> +
> +	/* select minimum and maximum rates among the requested rates */
> +	for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
> +		if (i == type) {
> +			min_rate = max(new_min_rate, min_rate);
> +			max_rate = min(new_max_rate, max_rate);
> +		} else {
> +			min_rate = max(req->min_rate, min_rate);
> +			max_rate = min(req->max_rate, max_rate);
> +		}
> +	}
> +
> +	if (min_rate > max_rate) {
> +		dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
> +				    __func__, type, min_rate, max_rate);
> +		return -ERANGE;
> +	}
> +
> +	err = clk_set_rate(emc->clk, min_rate);
> +	if (err)
> +		return err;
> +
> +	emc->requested_rate[type].min_rate = new_min_rate;
> +	emc->requested_rate[type].max_rate = new_max_rate;
> +
> +	return 0;
> +}
> +
> +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
> +			    enum emc_rate_request_type type)
> +{
> +	struct emc_rate_request *req = &emc->requested_rate[type];
> +	int ret;
> +
> +	mutex_lock(&emc->rate_lock);
> +	ret = emc_request_rate(emc, rate, req->max_rate, type);
> +	mutex_unlock(&emc->rate_lock);
> +
> +	return ret;
> +}
> +
> +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
> +			    enum emc_rate_request_type type)
> +{
> +	struct emc_rate_request *req = &emc->requested_rate[type];
> +	int ret;
> +
> +	mutex_lock(&emc->rate_lock);
> +	ret = emc_request_rate(emc, req->min_rate, rate, type);
> +	mutex_unlock(&emc->rate_lock);
> +
> +	return ret;
> +}
> +
>   /*
>    * debugfs interface
>    *
> @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
>   	if (!tegra186_emc_validate_rate(emc, rate))
>   		return -EINVAL;
>   
> -	err = clk_set_min_rate(emc->clk, rate);
> +	err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
>   	if (err < 0)
>   		return err;
>   
> @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
>   	if (!tegra186_emc_validate_rate(emc, rate))
>   		return -EINVAL;
>   
> -	err = clk_set_max_rate(emc->clk, rate);
> +	err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
>   	if (err < 0)
>   		return err;
>   
> @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
>   	return 0;
>   }
>   
> +static inline struct tegra186_emc *
> +to_tegra186_emc_provider(struct icc_provider *provider)
> +{
> +	return container_of(provider, struct tegra186_emc, provider);
> +}
> +
>   /*
>    * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
>    * @src: ICC node for External Memory Controller (EMC)
> @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
>    */
>   static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
>   {
> +	struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
> +	struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
> +	unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> +	unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> +	unsigned long long rate = max(avg_bw, peak_bw);
> +	const unsigned int ddr = 2;
> +	int err;
> +
> +	/*
> +	 * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
> +	 * Freq based on the passed values.
> +	 */
> +	if (mc->bwmgr_mrq_supported)
> +		return 0;
> +
> +	/*
> +	 * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
> +	 * EMC clock rate is twice smaller than the peak data rate because
> +	 * data is sampled on both EMC clock edges.
> +	 */
> +	do_div(rate, ddr);
> +	rate = min_t(u64, rate, U32_MAX);
> +
> +	err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
> +	if (err)
> +		return err;
> +
>   	return 0;
>   }
>   
> @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
>   	platform_set_drvdata(pdev, emc);
>   	emc->dev = &pdev->dev;
>   
> +	tegra186_emc_rate_requests_init(emc);
> +
>   	if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
>   		err = tegra186_emc_get_emc_dvfs_latency(emc);
>   		if (err)
> 


FYI, this patch is causing a boot regression on Tegra194 devices. I 
noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are 
no longer booting and bisect is pointing to this. I will have a closer 
look and try to see why this is.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months, 4 weeks ago

On Mon, Nov 10, 2025 at 3:25 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
> > From: Aaron Kling <webgeek1234@gmail.com>
> >
> > This adds support for dynamic frequency scaling of external memory on
> > devices with bpmp firmware that does not support bwmgr.
> >
> > Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
> > ---
> >   drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
> >   1 file changed, 130 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
> > index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
> > --- a/drivers/memory/tegra/tegra186-emc.c
> > +++ b/drivers/memory/tegra/tegra186-emc.c
> > @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
> >       unsigned long rate;
> >   };
> >
> > +enum emc_rate_request_type {
> > +     EMC_RATE_DEBUG,
> > +     EMC_RATE_ICC,
> > +     EMC_RATE_TYPE_MAX,
> > +};
> > +
> > +struct emc_rate_request {
> > +     unsigned long min_rate;
> > +     unsigned long max_rate;
> > +};
> > +
> >   struct tegra186_emc {
> >       struct tegra_bpmp *bpmp;
> >       struct device *dev;
> > @@ -33,8 +44,90 @@ struct tegra186_emc {
> >       } debugfs;
> >
> >       struct icc_provider provider;
> > +
> > +     /*
> > +      * There are multiple sources in the EMC driver which could request
> > +      * a min/max clock rate, these rates are contained in this array.
> > +      */
> > +     struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
> > +
> > +     /* protect shared rate-change code path */
> > +     struct mutex rate_lock;
> >   };
> >
> > +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
> > +{
> > +     unsigned int i;
> > +
> > +     for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
> > +             emc->requested_rate[i].min_rate = 0;
> > +             emc->requested_rate[i].max_rate = ULONG_MAX;
> > +     }
> > +}
> > +
> > +static int emc_request_rate(struct tegra186_emc *emc,
> > +                         unsigned long new_min_rate,
> > +                         unsigned long new_max_rate,
> > +                         enum emc_rate_request_type type)
> > +{
> > +     struct emc_rate_request *req = emc->requested_rate;
> > +     unsigned long min_rate = 0, max_rate = ULONG_MAX;
> > +     unsigned int i;
> > +     int err;
> > +
> > +     /* select minimum and maximum rates among the requested rates */
> > +     for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
> > +             if (i == type) {
> > +                     min_rate = max(new_min_rate, min_rate);
> > +                     max_rate = min(new_max_rate, max_rate);
> > +             } else {
> > +                     min_rate = max(req->min_rate, min_rate);
> > +                     max_rate = min(req->max_rate, max_rate);
> > +             }
> > +     }
> > +
> > +     if (min_rate > max_rate) {
> > +             dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
> > +                                 __func__, type, min_rate, max_rate);
> > +             return -ERANGE;
> > +     }
> > +
> > +     err = clk_set_rate(emc->clk, min_rate);
> > +     if (err)
> > +             return err;
> > +
> > +     emc->requested_rate[type].min_rate = new_min_rate;
> > +     emc->requested_rate[type].max_rate = new_max_rate;
> > +
> > +     return 0;
> > +}
> > +
> > +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
> > +                         enum emc_rate_request_type type)
> > +{
> > +     struct emc_rate_request *req = &emc->requested_rate[type];
> > +     int ret;
> > +
> > +     mutex_lock(&emc->rate_lock);
> > +     ret = emc_request_rate(emc, rate, req->max_rate, type);
> > +     mutex_unlock(&emc->rate_lock);
> > +
> > +     return ret;
> > +}
> > +
> > +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
> > +                         enum emc_rate_request_type type)
> > +{
> > +     struct emc_rate_request *req = &emc->requested_rate[type];
> > +     int ret;
> > +
> > +     mutex_lock(&emc->rate_lock);
> > +     ret = emc_request_rate(emc, req->min_rate, rate, type);
> > +     mutex_unlock(&emc->rate_lock);
> > +
> > +     return ret;
> > +}
> > +
> >   /*
> >    * debugfs interface
> >    *
> > @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
> >       if (!tegra186_emc_validate_rate(emc, rate))
> >               return -EINVAL;
> >
> > -     err = clk_set_min_rate(emc->clk, rate);
> > +     err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
> >       if (err < 0)
> >               return err;
> >
> > @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
> >       if (!tegra186_emc_validate_rate(emc, rate))
> >               return -EINVAL;
> >
> > -     err = clk_set_max_rate(emc->clk, rate);
> > +     err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
> >       if (err < 0)
> >               return err;
> >
> > @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> >       return 0;
> >   }
> >
> > +static inline struct tegra186_emc *
> > +to_tegra186_emc_provider(struct icc_provider *provider)
> > +{
> > +     return container_of(provider, struct tegra186_emc, provider);
> > +}
> > +
> >   /*
> >    * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
> >    * @src: ICC node for External Memory Controller (EMC)
> > @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> >    */
> >   static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
> >   {
> > +     struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
> > +     struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
> > +     unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> > +     unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> > +     unsigned long long rate = max(avg_bw, peak_bw);
> > +     const unsigned int ddr = 2;
> > +     int err;
> > +
> > +     /*
> > +      * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
> > +      * Freq based on the passed values.
> > +      */
> > +     if (mc->bwmgr_mrq_supported)
> > +             return 0;
> > +
> > +     /*
> > +      * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
> > +      * EMC clock rate is twice smaller than the peak data rate because
> > +      * data is sampled on both EMC clock edges.
> > +      */
> > +     do_div(rate, ddr);
> > +     rate = min_t(u64, rate, U32_MAX);
> > +
> > +     err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
> > +     if (err)
> > +             return err;
> > +
> >       return 0;
> >   }
> >
> > @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
> >       platform_set_drvdata(pdev, emc);
> >       emc->dev = &pdev->dev;
> >
> > +     tegra186_emc_rate_requests_init(emc);
> > +
> >       if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
> >               err = tegra186_emc_get_emc_dvfs_latency(emc);
> >               if (err)
> >
>
>
> FYI, this patch is causing a boot regression on Tegra194 devices. I
> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
> no longer booting and bisect is pointing to this. I will have a closer
> look and try to see why this is.

Interesting. Both were booting for me during my verification, though
my use case involves the dt changes that I don't believe have been
picked up yet. Thought I had explicitly verified without the dt
changes too, though. Since I was asked to do so on this or one of the
other similar series. I will try to check linux-next as-is soon.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months, 4 weeks ago

On Mon, Nov 10, 2025 at 3:55 PM Aaron Kling <webgeek1234@gmail.com> wrote:
>
> On Mon, Nov 10, 2025 at 3:25 PM Jon Hunter <jonathanh@nvidia.com> wrote:
> >
> >
> > On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
> > > From: Aaron Kling <webgeek1234@gmail.com>
> > >
> > > This adds support for dynamic frequency scaling of external memory on
> > > devices with bpmp firmware that does not support bwmgr.
> > >
> > > Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
> > > ---
> > >   drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
> > >   1 file changed, 130 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
> > > index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
> > > --- a/drivers/memory/tegra/tegra186-emc.c
> > > +++ b/drivers/memory/tegra/tegra186-emc.c
> > > @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
> > >       unsigned long rate;
> > >   };
> > >
> > > +enum emc_rate_request_type {
> > > +     EMC_RATE_DEBUG,
> > > +     EMC_RATE_ICC,
> > > +     EMC_RATE_TYPE_MAX,
> > > +};
> > > +
> > > +struct emc_rate_request {
> > > +     unsigned long min_rate;
> > > +     unsigned long max_rate;
> > > +};
> > > +
> > >   struct tegra186_emc {
> > >       struct tegra_bpmp *bpmp;
> > >       struct device *dev;
> > > @@ -33,8 +44,90 @@ struct tegra186_emc {
> > >       } debugfs;
> > >
> > >       struct icc_provider provider;
> > > +
> > > +     /*
> > > +      * There are multiple sources in the EMC driver which could request
> > > +      * a min/max clock rate, these rates are contained in this array.
> > > +      */
> > > +     struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
> > > +
> > > +     /* protect shared rate-change code path */
> > > +     struct mutex rate_lock;
> > >   };
> > >
> > > +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
> > > +{
> > > +     unsigned int i;
> > > +
> > > +     for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
> > > +             emc->requested_rate[i].min_rate = 0;
> > > +             emc->requested_rate[i].max_rate = ULONG_MAX;
> > > +     }
> > > +}
> > > +
> > > +static int emc_request_rate(struct tegra186_emc *emc,
> > > +                         unsigned long new_min_rate,
> > > +                         unsigned long new_max_rate,
> > > +                         enum emc_rate_request_type type)
> > > +{
> > > +     struct emc_rate_request *req = emc->requested_rate;
> > > +     unsigned long min_rate = 0, max_rate = ULONG_MAX;
> > > +     unsigned int i;
> > > +     int err;
> > > +
> > > +     /* select minimum and maximum rates among the requested rates */
> > > +     for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
> > > +             if (i == type) {
> > > +                     min_rate = max(new_min_rate, min_rate);
> > > +                     max_rate = min(new_max_rate, max_rate);
> > > +             } else {
> > > +                     min_rate = max(req->min_rate, min_rate);
> > > +                     max_rate = min(req->max_rate, max_rate);
> > > +             }
> > > +     }
> > > +
> > > +     if (min_rate > max_rate) {
> > > +             dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
> > > +                                 __func__, type, min_rate, max_rate);
> > > +             return -ERANGE;
> > > +     }
> > > +
> > > +     err = clk_set_rate(emc->clk, min_rate);
> > > +     if (err)
> > > +             return err;
> > > +
> > > +     emc->requested_rate[type].min_rate = new_min_rate;
> > > +     emc->requested_rate[type].max_rate = new_max_rate;
> > > +
> > > +     return 0;
> > > +}
> > > +
> > > +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
> > > +                         enum emc_rate_request_type type)
> > > +{
> > > +     struct emc_rate_request *req = &emc->requested_rate[type];
> > > +     int ret;
> > > +
> > > +     mutex_lock(&emc->rate_lock);
> > > +     ret = emc_request_rate(emc, rate, req->max_rate, type);
> > > +     mutex_unlock(&emc->rate_lock);
> > > +
> > > +     return ret;
> > > +}
> > > +
> > > +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
> > > +                         enum emc_rate_request_type type)
> > > +{
> > > +     struct emc_rate_request *req = &emc->requested_rate[type];
> > > +     int ret;
> > > +
> > > +     mutex_lock(&emc->rate_lock);
> > > +     ret = emc_request_rate(emc, req->min_rate, rate, type);
> > > +     mutex_unlock(&emc->rate_lock);
> > > +
> > > +     return ret;
> > > +}
> > > +
> > >   /*
> > >    * debugfs interface
> > >    *
> > > @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
> > >       if (!tegra186_emc_validate_rate(emc, rate))
> > >               return -EINVAL;
> > >
> > > -     err = clk_set_min_rate(emc->clk, rate);
> > > +     err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
> > >       if (err < 0)
> > >               return err;
> > >
> > > @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
> > >       if (!tegra186_emc_validate_rate(emc, rate))
> > >               return -EINVAL;
> > >
> > > -     err = clk_set_max_rate(emc->clk, rate);
> > > +     err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
> > >       if (err < 0)
> > >               return err;
> > >
> > > @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> > >       return 0;
> > >   }
> > >
> > > +static inline struct tegra186_emc *
> > > +to_tegra186_emc_provider(struct icc_provider *provider)
> > > +{
> > > +     return container_of(provider, struct tegra186_emc, provider);
> > > +}
> > > +
> > >   /*
> > >    * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
> > >    * @src: ICC node for External Memory Controller (EMC)
> > > @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
> > >    */
> > >   static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
> > >   {
> > > +     struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
> > > +     struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
> > > +     unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> > > +     unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> > > +     unsigned long long rate = max(avg_bw, peak_bw);
> > > +     const unsigned int ddr = 2;
> > > +     int err;
> > > +
> > > +     /*
> > > +      * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
> > > +      * Freq based on the passed values.
> > > +      */
> > > +     if (mc->bwmgr_mrq_supported)
> > > +             return 0;
> > > +
> > > +     /*
> > > +      * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
> > > +      * EMC clock rate is twice smaller than the peak data rate because
> > > +      * data is sampled on both EMC clock edges.
> > > +      */
> > > +     do_div(rate, ddr);
> > > +     rate = min_t(u64, rate, U32_MAX);
> > > +
> > > +     err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
> > > +     if (err)
> > > +             return err;
> > > +
> > >       return 0;
> > >   }
> > >
> > > @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
> > >       platform_set_drvdata(pdev, emc);
> > >       emc->dev = &pdev->dev;
> > >
> > > +     tegra186_emc_rate_requests_init(emc);
> > > +
> > >       if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
> > >               err = tegra186_emc_get_emc_dvfs_latency(emc);
> > >               if (err)
> > >
> >
> >
> > FYI, this patch is causing a boot regression on Tegra194 devices. I
> > noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
> > no longer booting and bisect is pointing to this. I will have a closer
> > look and try to see why this is.
>
> Interesting. Both were booting for me during my verification, though
> my use case involves the dt changes that I don't believe have been
> picked up yet. Thought I had explicitly verified without the dt
> changes too, though. Since I was asked to do so on this or one of the
> other similar series. I will try to check linux-next as-is soon.

I just built next-20251110 using the standard arm64 defconfig and
flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
and both booted to cli on a barebones busybox ramdisk. I do not see
any errors from tegra-mc, and the only error I see from tegra186-emc
is that it can't find the opp tables, which is expected without the dt
changes, and is not fatal.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 11/11/2025 01:39, Aaron Kling wrote:
> On Mon, Nov 10, 2025 at 3:55 PM Aaron Kling <webgeek1234@gmail.com> wrote:
>>
>> On Mon, Nov 10, 2025 at 3:25 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>>>
>>>
>>> On 27/10/2025 18:55, Aaron Kling via B4 Relay wrote:
>>>> From: Aaron Kling <webgeek1234@gmail.com>
>>>>
>>>> This adds support for dynamic frequency scaling of external memory on
>>>> devices with bpmp firmware that does not support bwmgr.
>>>>
>>>> Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
>>>> ---
>>>>    drivers/memory/tegra/tegra186-emc.c | 132 +++++++++++++++++++++++++++++++++++-
>>>>    1 file changed, 130 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
>>>> index 9959ad5804b444b269456d1fbae87b4bc111661b..74be09968baa7a0fbdce4359f470ce56b18acb10 100644
>>>> --- a/drivers/memory/tegra/tegra186-emc.c
>>>> +++ b/drivers/memory/tegra/tegra186-emc.c
>>>> @@ -18,6 +18,17 @@ struct tegra186_emc_dvfs {
>>>>        unsigned long rate;
>>>>    };
>>>>
>>>> +enum emc_rate_request_type {
>>>> +     EMC_RATE_DEBUG,
>>>> +     EMC_RATE_ICC,
>>>> +     EMC_RATE_TYPE_MAX,
>>>> +};
>>>> +
>>>> +struct emc_rate_request {
>>>> +     unsigned long min_rate;
>>>> +     unsigned long max_rate;
>>>> +};
>>>> +
>>>>    struct tegra186_emc {
>>>>        struct tegra_bpmp *bpmp;
>>>>        struct device *dev;
>>>> @@ -33,8 +44,90 @@ struct tegra186_emc {
>>>>        } debugfs;
>>>>
>>>>        struct icc_provider provider;
>>>> +
>>>> +     /*
>>>> +      * There are multiple sources in the EMC driver which could request
>>>> +      * a min/max clock rate, these rates are contained in this array.
>>>> +      */
>>>> +     struct emc_rate_request requested_rate[EMC_RATE_TYPE_MAX];
>>>> +
>>>> +     /* protect shared rate-change code path */
>>>> +     struct mutex rate_lock;
>>>>    };
>>>>
>>>> +static void tegra186_emc_rate_requests_init(struct tegra186_emc *emc)
>>>> +{
>>>> +     unsigned int i;
>>>> +
>>>> +     for (i = 0; i < EMC_RATE_TYPE_MAX; i++) {
>>>> +             emc->requested_rate[i].min_rate = 0;
>>>> +             emc->requested_rate[i].max_rate = ULONG_MAX;
>>>> +     }
>>>> +}
>>>> +
>>>> +static int emc_request_rate(struct tegra186_emc *emc,
>>>> +                         unsigned long new_min_rate,
>>>> +                         unsigned long new_max_rate,
>>>> +                         enum emc_rate_request_type type)
>>>> +{
>>>> +     struct emc_rate_request *req = emc->requested_rate;
>>>> +     unsigned long min_rate = 0, max_rate = ULONG_MAX;
>>>> +     unsigned int i;
>>>> +     int err;
>>>> +
>>>> +     /* select minimum and maximum rates among the requested rates */
>>>> +     for (i = 0; i < EMC_RATE_TYPE_MAX; i++, req++) {
>>>> +             if (i == type) {
>>>> +                     min_rate = max(new_min_rate, min_rate);
>>>> +                     max_rate = min(new_max_rate, max_rate);
>>>> +             } else {
>>>> +                     min_rate = max(req->min_rate, min_rate);
>>>> +                     max_rate = min(req->max_rate, max_rate);
>>>> +             }
>>>> +     }
>>>> +
>>>> +     if (min_rate > max_rate) {
>>>> +             dev_err_ratelimited(emc->dev, "%s: type %u: out of range: %lu %lu\n",
>>>> +                                 __func__, type, min_rate, max_rate);
>>>> +             return -ERANGE;
>>>> +     }
>>>> +
>>>> +     err = clk_set_rate(emc->clk, min_rate);
>>>> +     if (err)
>>>> +             return err;
>>>> +
>>>> +     emc->requested_rate[type].min_rate = new_min_rate;
>>>> +     emc->requested_rate[type].max_rate = new_max_rate;
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +static int emc_set_min_rate(struct tegra186_emc *emc, unsigned long rate,
>>>> +                         enum emc_rate_request_type type)
>>>> +{
>>>> +     struct emc_rate_request *req = &emc->requested_rate[type];
>>>> +     int ret;
>>>> +
>>>> +     mutex_lock(&emc->rate_lock);
>>>> +     ret = emc_request_rate(emc, rate, req->max_rate, type);
>>>> +     mutex_unlock(&emc->rate_lock);
>>>> +
>>>> +     return ret;
>>>> +}
>>>> +
>>>> +static int emc_set_max_rate(struct tegra186_emc *emc, unsigned long rate,
>>>> +                         enum emc_rate_request_type type)
>>>> +{
>>>> +     struct emc_rate_request *req = &emc->requested_rate[type];
>>>> +     int ret;
>>>> +
>>>> +     mutex_lock(&emc->rate_lock);
>>>> +     ret = emc_request_rate(emc, req->min_rate, rate, type);
>>>> +     mutex_unlock(&emc->rate_lock);
>>>> +
>>>> +     return ret;
>>>> +}
>>>> +
>>>>    /*
>>>>     * debugfs interface
>>>>     *
>>>> @@ -107,7 +200,7 @@ static int tegra186_emc_debug_min_rate_set(void *data, u64 rate)
>>>>        if (!tegra186_emc_validate_rate(emc, rate))
>>>>                return -EINVAL;
>>>>
>>>> -     err = clk_set_min_rate(emc->clk, rate);
>>>> +     err = emc_set_min_rate(emc, rate, EMC_RATE_DEBUG);
>>>>        if (err < 0)
>>>>                return err;
>>>>
>>>> @@ -137,7 +230,7 @@ static int tegra186_emc_debug_max_rate_set(void *data, u64 rate)
>>>>        if (!tegra186_emc_validate_rate(emc, rate))
>>>>                return -EINVAL;
>>>>
>>>> -     err = clk_set_max_rate(emc->clk, rate);
>>>> +     err = emc_set_max_rate(emc, rate, EMC_RATE_DEBUG);
>>>>        if (err < 0)
>>>>                return err;
>>>>
>>>> @@ -217,6 +310,12 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
>>>>        return 0;
>>>>    }
>>>>
>>>> +static inline struct tegra186_emc *
>>>> +to_tegra186_emc_provider(struct icc_provider *provider)
>>>> +{
>>>> +     return container_of(provider, struct tegra186_emc, provider);
>>>> +}
>>>> +
>>>>    /*
>>>>     * tegra186_emc_icc_set_bw() - Set BW api for EMC provider
>>>>     * @src: ICC node for External Memory Controller (EMC)
>>>> @@ -227,6 +326,33 @@ static int tegra186_emc_get_emc_dvfs_latency(struct tegra186_emc *emc)
>>>>     */
>>>>    static int tegra186_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
>>>>    {
>>>> +     struct tegra186_emc *emc = to_tegra186_emc_provider(dst->provider);
>>>> +     struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
>>>> +     unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
>>>> +     unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
>>>> +     unsigned long long rate = max(avg_bw, peak_bw);
>>>> +     const unsigned int ddr = 2;
>>>> +     int err;
>>>> +
>>>> +     /*
>>>> +      * Do nothing here if bwmgr is supported in BPMP-FW. BPMP-FW sets the final
>>>> +      * Freq based on the passed values.
>>>> +      */
>>>> +     if (mc->bwmgr_mrq_supported)
>>>> +             return 0;
>>>> +
>>>> +     /*
>>>> +      * Tegra186 EMC runs on a clock rate of SDRAM bus. This means that
>>>> +      * EMC clock rate is twice smaller than the peak data rate because
>>>> +      * data is sampled on both EMC clock edges.
>>>> +      */
>>>> +     do_div(rate, ddr);
>>>> +     rate = min_t(u64, rate, U32_MAX);
>>>> +
>>>> +     err = emc_set_min_rate(emc, rate, EMC_RATE_ICC);
>>>> +     if (err)
>>>> +             return err;
>>>> +
>>>>        return 0;
>>>>    }
>>>>
>>>> @@ -329,6 +455,8 @@ static int tegra186_emc_probe(struct platform_device *pdev)
>>>>        platform_set_drvdata(pdev, emc);
>>>>        emc->dev = &pdev->dev;
>>>>
>>>> +     tegra186_emc_rate_requests_init(emc);
>>>> +
>>>>        if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
>>>>                err = tegra186_emc_get_emc_dvfs_latency(emc);
>>>>                if (err)
>>>>
>>>
>>>
>>> FYI, this patch is causing a boot regression on Tegra194 devices. I
>>> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
>>> no longer booting and bisect is pointing to this. I will have a closer
>>> look and try to see why this is.
>>
>> Interesting. Both were booting for me during my verification, though
>> my use case involves the dt changes that I don't believe have been
>> picked up yet. Thought I had explicitly verified without the dt
>> changes too, though. Since I was asked to do so on this or one of the
>> other similar series. I will try to check linux-next as-is soon.
> 
> I just built next-20251110 using the standard arm64 defconfig and
> flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
> and both booted to cli on a barebones busybox ramdisk. I do not see
> any errors from tegra-mc, and the only error I see from tegra186-emc
> is that it can't find the opp tables, which is expected without the dt
> changes, and is not fatal.

Thanks for testing. Something is not right because our boards are 
failing. So may be we are doing/testing something different. However, 
this should not break. So there is a problem here.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Krzysztof Kozlowski 2 months, 4 weeks ago

On 11/11/2025 12:13, Jon Hunter wrote:
>>>>> +
>>>>>        if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
>>>>>                err = tegra186_emc_get_emc_dvfs_latency(emc);
>>>>>                if (err)
>>>>>
>>>>
>>>>
>>>> FYI, this patch is causing a boot regression on Tegra194 devices. I
>>>> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
>>>> no longer booting and bisect is pointing to this. I will have a closer
>>>> look and try to see why this is.
>>>
>>> Interesting. Both were booting for me during my verification, though
>>> my use case involves the dt changes that I don't believe have been
>>> picked up yet. Thought I had explicitly verified without the dt
>>> changes too, though. Since I was asked to do so on this or one of the
>>> other similar series. I will try to check linux-next as-is soon.
>>
>> I just built next-20251110 using the standard arm64 defconfig and
>> flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
>> and both booted to cli on a barebones busybox ramdisk. I do not see
>> any errors from tegra-mc, and the only error I see from tegra186-emc
>> is that it can't find the opp tables, which is expected without the dt
>> changes, and is not fatal.
> 
> Thanks for testing. Something is not right because our boards are 
> failing. So may be we are doing/testing something different. However, 
> this should not break. So there is a problem here.


Did you meant: "So there is NO problem here"?

I kept these for 10 days in linux-next and yesterday sent them in pull
request. If some patches are needed on top, they can still fit coming
merge window if sent soon.


Best regards,
Krzysztof

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 11/11/2025 11:16, Krzysztof Kozlowski wrote:
> On 11/11/2025 12:13, Jon Hunter wrote:
>>>>>> +
>>>>>>         if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_EMC_DVFS_LATENCY)) {
>>>>>>                 err = tegra186_emc_get_emc_dvfs_latency(emc);
>>>>>>                 if (err)
>>>>>>
>>>>>
>>>>>
>>>>> FYI, this patch is causing a boot regression on Tegra194 devices. I
>>>>> noticed that tegra194-p2972-0000 and tegra194-p3509-0000+p3668-0000 are
>>>>> no longer booting and bisect is pointing to this. I will have a closer
>>>>> look and try to see why this is.
>>>>
>>>> Interesting. Both were booting for me during my verification, though
>>>> my use case involves the dt changes that I don't believe have been
>>>> picked up yet. Thought I had explicitly verified without the dt
>>>> changes too, though. Since I was asked to do so on this or one of the
>>>> other similar series. I will try to check linux-next as-is soon.
>>>
>>> I just built next-20251110 using the standard arm64 defconfig and
>>> flashed the resulting Image and dtb's to p2972 and p3518 (p3509+p3668)
>>> and both booted to cli on a barebones busybox ramdisk. I do not see
>>> any errors from tegra-mc, and the only error I see from tegra186-emc
>>> is that it can't find the opp tables, which is expected without the dt
>>> changes, and is not fatal.
>>
>> Thanks for testing. Something is not right because our boards are
>> failing. So may be we are doing/testing something different. However,
>> this should not break. So there is a problem here.
> 
> 
> Did you meant: "So there is NO problem here"?

Nope. I mean that this is a problem here.

> I kept these for 10 days in linux-next and yesterday sent them in pull
> request. If some patches are needed on top, they can still fit coming
> merge window if sent soon.

Looking back I see it started failing with next-20251103. next-20251031 
was fine. Reverting this commit on top of next-20251110 fixes the issue.

There may be a difference in the firmware being used. Our testing is 
based upon an older NVIDIA L4T r32.5.1 release but nonetheless, we 
should not break that.

Jon
-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 11/11/2025 12:05, Jon Hunter wrote:

...

>>> Thanks for testing. Something is not right because our boards are
>>> failing. So may be we are doing/testing something different. However,
>>> this should not break. So there is a problem here.
>>
>>
>> Did you meant: "So there is NO problem here"?
> 
> Nope. I mean that this is a problem here.
> 
>> I kept these for 10 days in linux-next and yesterday sent them in pull
>> request. If some patches are needed on top, they can still fit coming
>> merge window if sent soon.
> 
> Looking back I see it started failing with next-20251103. next-20251031 
> was fine. Reverting this commit on top of next-20251110 fixes the issue.
> 
> There may be a difference in the firmware being used. Our testing is 
> based upon an older NVIDIA L4T r32.5.1 release but nonetheless, we 
> should not break that.

OK, so I see what is happening here. The boot test that we are running 
has a 2 minute timeout and the board is now failing to boot within that 
time.

Adding some debug prints, I can see that initially the EMC clock 
frequency is 1600MHz and now after this change, on boot the EMC clock 
get set to 250MHz. Hence, the booting is now taking significantly longer 
and the test times out.

We definitely don't want to increase the timeout of the test. Any thoughts?

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months, 4 weeks ago

On Tue, Nov 11, 2025 at 8:35 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 11/11/2025 12:05, Jon Hunter wrote:
>
> ...
>
> >>> Thanks for testing. Something is not right because our boards are
> >>> failing. So may be we are doing/testing something different. However,
> >>> this should not break. So there is a problem here.
> >>
> >>
> >> Did you meant: "So there is NO problem here"?
> >
> > Nope. I mean that this is a problem here.
> >
> >> I kept these for 10 days in linux-next and yesterday sent them in pull
> >> request. If some patches are needed on top, they can still fit coming
> >> merge window if sent soon.
> >
> > Looking back I see it started failing with next-20251103. next-20251031
> > was fine. Reverting this commit on top of next-20251110 fixes the issue.
> >
> > There may be a difference in the firmware being used. Our testing is
> > based upon an older NVIDIA L4T r32.5.1 release but nonetheless, we
> > should not break that.
>
>
> OK, so I see what is happening here. The boot test that we are running
> has a 2 minute timeout and the board is now failing to boot within that
> time.
>
> Adding some debug prints, I can see that initially the EMC clock
> frequency is 1600MHz and now after this change, on boot the EMC clock
> get set to 250MHz. Hence, the booting is now taking significantly longer
> and the test times out.
>
> We definitely don't want to increase the timeout of the test. Any thoughts?

My setup uses the boot stack from L4T r32.7.6, though cboot is source
built and has had changes over time to support newer Android versions.
There shouldn't be anything there that would affect emc clock, though.

I'm seeing the emc clock stay at the boot value, namely 1600MHz. Per
both debugfs clk/emc/clk_rate and bpmp/debug/clk/emc/rate. I don't
even see 250MHz as an option. Debugfs emc/available_rates lists 204MHz
as the closest entry.

I'm trying to think what could cause a drop in the selected clock
rate. This patch should only dynamically change the rate if the opp
tables exist, enabling the cpufreq based scaling via icc. But those
tables don't exist on linux-next right now. My test ramdisk does
nothing except set up sysfs/procfs/etc just enough to run a busybox
shell for debugging. Do the Nvidia regression testing boot scripts do
anything to sysfs or debugfs that would affect emc?

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 11/11/2025 17:04, Aaron Kling wrote:

...

> My setup uses the boot stack from L4T r32.7.6, though cboot is source
> built and has had changes over time to support newer Android versions.
> There shouldn't be anything there that would affect emc clock, though.
> 
> I'm seeing the emc clock stay at the boot value, namely 1600MHz. Per
> both debugfs clk/emc/clk_rate and bpmp/debug/clk/emc/rate. I don't
> even see 250MHz as an option. Debugfs emc/available_rates lists 204MHz
> as the closest entry.
> 
> I'm trying to think what could cause a drop in the selected clock
> rate. This patch should only dynamically change the rate if the opp
> tables exist, enabling the cpufreq based scaling via icc. But those
> tables don't exist on linux-next right now. My test ramdisk does
> nothing except set up sysfs/procfs/etc just enough to run a busybox
> shell for debugging. Do the Nvidia regression testing boot scripts do
> anything to sysfs or debugfs that would affect emc?

So this is definitely coming from ICC. On boot I see a request for
250MHz coming from the PCIe driver ...

[   13.861227] tegra186_emc_icc_set_bw-356: rate 250000000
[   13.861350] CPU: 1 UID: 0 PID: 68 Comm: kworker/u32:1 Not tainted 6.18.0-rc4-next-20251110-00001-gfc12493c80fb-dirty #9 PREEMPT
[   13.861362] Hardware name: NVIDIA Jetson AGX Xavier Developer Kit (DT)
[   13.861370] Workqueue: events_unbound deferred_probe_work_func
[   13.861388] Call trace:
[   13.861393]  show_stack+0x18/0x24 (C)
[   13.861407]  dump_stack_lvl+0x74/0x8c
[   13.861419]  dump_stack+0x18/0x24
[   13.861426]  tegra186_emc_icc_set_bw+0xc8/0x14c
[   13.861438]  apply_constraints+0x70/0xb0
[   13.861451]  icc_set_bw+0x88/0x128
[   13.861461]  tegra_pcie_icc_set+0x7c/0x10c [pcie_tegra194]
[   13.861499]  tegra_pcie_dw_start_link+0x178/0x2b0 [pcie_tegra194]
[   13.861510]  dw_pcie_host_init+0x664/0x6e0
[   13.861523]  tegra_pcie_dw_probe+0x6d4/0xbfc [pcie_tegra194]
[   13.861534]  platform_probe+0x5c/0x98
[   13.861547]  really_probe+0xbc/0x2a8
[   13.861555]  __driver_probe_device+0x78/0x12c
[   13.861563]  driver_probe_device+0x3c/0x15c
[   13.861572]  __device_attach_driver+0xb8/0x134
[   13.861580]  bus_for_each_drv+0x84/0xe0
[   13.861588]  __device_attach+0x9c/0x188
[   13.861596]  device_initial_probe+0x14/0x20
[   13.861610]  bus_probe_device+0xac/0xb0
[   13.861619]  deferred_probe_work_func+0x88/0xc0
[   13.861627]  process_one_work+0x148/0x28c
[   13.861640]  worker_thread+0x2d0/0x3d8
[   13.861648]  kthread+0x128/0x200
[   13.861659]  ret_from_fork+0x10/0x20

The actual rate that is set is 408MHz if I read the rate after
it is set ...

[   13.912099] tegra186_emc_icc_set_bw-362: rate 408000000

This is a simple boot test and so nothing we are doing via
debugfs/sysfs to influence this.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months, 4 weeks ago

On Tue, Nov 11, 2025 at 3:29 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 11/11/2025 17:04, Aaron Kling wrote:
>
> ...
>
> > My setup uses the boot stack from L4T r32.7.6, though cboot is source
> > built and has had changes over time to support newer Android versions.
> > There shouldn't be anything there that would affect emc clock, though.
> >
> > I'm seeing the emc clock stay at the boot value, namely 1600MHz. Per
> > both debugfs clk/emc/clk_rate and bpmp/debug/clk/emc/rate. I don't
> > even see 250MHz as an option. Debugfs emc/available_rates lists 204MHz
> > as the closest entry.
> >
> > I'm trying to think what could cause a drop in the selected clock
> > rate. This patch should only dynamically change the rate if the opp
> > tables exist, enabling the cpufreq based scaling via icc. But those
> > tables don't exist on linux-next right now. My test ramdisk does
> > nothing except set up sysfs/procfs/etc just enough to run a busybox
> > shell for debugging. Do the Nvidia regression testing boot scripts do
> > anything to sysfs or debugfs that would affect emc?
>
> So this is definitely coming from ICC. On boot I see a request for
> 250MHz coming from the PCIe driver ...
>
> [   13.861227] tegra186_emc_icc_set_bw-356: rate 250000000
> [   13.861350] CPU: 1 UID: 0 PID: 68 Comm: kworker/u32:1 Not tainted 6.18.0-rc4-next-20251110-00001-gfc12493c80fb-dirty #9 PREEMPT
> [   13.861362] Hardware name: NVIDIA Jetson AGX Xavier Developer Kit (DT)
> [   13.861370] Workqueue: events_unbound deferred_probe_work_func
> [   13.861388] Call trace:
> [   13.861393]  show_stack+0x18/0x24 (C)
> [   13.861407]  dump_stack_lvl+0x74/0x8c
> [   13.861419]  dump_stack+0x18/0x24
> [   13.861426]  tegra186_emc_icc_set_bw+0xc8/0x14c
> [   13.861438]  apply_constraints+0x70/0xb0
> [   13.861451]  icc_set_bw+0x88/0x128
> [   13.861461]  tegra_pcie_icc_set+0x7c/0x10c [pcie_tegra194]
> [   13.861499]  tegra_pcie_dw_start_link+0x178/0x2b0 [pcie_tegra194]
> [   13.861510]  dw_pcie_host_init+0x664/0x6e0
> [   13.861523]  tegra_pcie_dw_probe+0x6d4/0xbfc [pcie_tegra194]
> [   13.861534]  platform_probe+0x5c/0x98
> [   13.861547]  really_probe+0xbc/0x2a8
> [   13.861555]  __driver_probe_device+0x78/0x12c
> [   13.861563]  driver_probe_device+0x3c/0x15c
> [   13.861572]  __device_attach_driver+0xb8/0x134
> [   13.861580]  bus_for_each_drv+0x84/0xe0
> [   13.861588]  __device_attach+0x9c/0x188
> [   13.861596]  device_initial_probe+0x14/0x20
> [   13.861610]  bus_probe_device+0xac/0xb0
> [   13.861619]  deferred_probe_work_func+0x88/0xc0
> [   13.861627]  process_one_work+0x148/0x28c
> [   13.861640]  worker_thread+0x2d0/0x3d8
> [   13.861648]  kthread+0x128/0x200
> [   13.861659]  ret_from_fork+0x10/0x20
>
> The actual rate that is set is 408MHz if I read the rate after
> it is set ...
>
> [   13.912099] tegra186_emc_icc_set_bw-362: rate 408000000
>
> This is a simple boot test and so nothing we are doing via
> debugfs/sysfs to influence this.

Alright, I think I've got the picture of what's going on now. The
standard arm64 defconfig enables the t194 pcie driver as a module. And
my simple busybox ramdisk that I use for mainline regression testing
isn't loading any modules. If I set the pcie driver to built-in, I
replicate the issue. And I don't see the issue on my normal use case,
because I have the dt changes as well.

So it appears that the pcie driver submits icc bandwidth. And without
cpufreq submitting bandwidth as well, the emc driver gets a very low
number and thus sets a very low emc freq. The question becomes... what
to do about it? If the related dt changes were submitted to
linux-next, everything should fall into place. And I'm not sure where
this falls on the severity scale since it doesn't full out break boot
or prevent operation.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Krzysztof Kozlowski 2 months, 4 weeks ago

On 12/11/2025 00:17, Aaron Kling wrote:
>>
>> The actual rate that is set is 408MHz if I read the rate after
>> it is set ...
>>
>> [   13.912099] tegra186_emc_icc_set_bw-362: rate 408000000
>>
>> This is a simple boot test and so nothing we are doing via
>> debugfs/sysfs to influence this.
> 
> Alright, I think I've got the picture of what's going on now. The
> standard arm64 defconfig enables the t194 pcie driver as a module. And
> my simple busybox ramdisk that I use for mainline regression testing
> isn't loading any modules. If I set the pcie driver to built-in, I
> replicate the issue. And I don't see the issue on my normal use case,
> because I have the dt changes as well.
> 
> So it appears that the pcie driver submits icc bandwidth. And without
> cpufreq submitting bandwidth as well, the emc driver gets a very low
> number and thus sets a very low emc freq. The question becomes... what

If this depends on DT changes then it is obvious ABI break. Nothing in
commit msgs explained ABI impact.

> to do about it? If the related dt changes were submitted to
> linux-next, everything should fall into place. And I'm not sure where
> this falls on the severity scale since it doesn't full out break boot
> or prevent operation.
> 
> Aaron


Best regards,
Krzysztof

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 11/11/2025 23:17, Aaron Kling wrote:

...

> Alright, I think I've got the picture of what's going on now. The
> standard arm64 defconfig enables the t194 pcie driver as a module. And
> my simple busybox ramdisk that I use for mainline regression testing
> isn't loading any modules. If I set the pcie driver to built-in, I
> replicate the issue. And I don't see the issue on my normal use case,
> because I have the dt changes as well.
> 
> So it appears that the pcie driver submits icc bandwidth. And without
> cpufreq submitting bandwidth as well, the emc driver gets a very low
> number and thus sets a very low emc freq. The question becomes... what
> to do about it? If the related dt changes were submitted to
> linux-next, everything should fall into place. And I'm not sure where
> this falls on the severity scale since it doesn't full out break boot
> or prevent operation.

Where are the related DT changes? If we can get these into -next and 
lined up to be merged for v6.19, then that is fine. However, we should 
not merge this for v6.19 without the DT changes.

I will also talk with Thierry to see if he has any concerns about users 
seeing slow performance if they don't have an up-to-date DTB.

Is there any easy way to detect if the DTB has he necessary properties 
to enable ICC scaling?

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Krzysztof Kozlowski 2 months, 4 weeks ago

On 12/11/2025 07:18, Jon Hunter wrote:
> 
> On 11/11/2025 23:17, Aaron Kling wrote:
> 
> ...
> 
>> Alright, I think I've got the picture of what's going on now. The
>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>> my simple busybox ramdisk that I use for mainline regression testing
>> isn't loading any modules. If I set the pcie driver to built-in, I
>> replicate the issue. And I don't see the issue on my normal use case,
>> because I have the dt changes as well.
>>
>> So it appears that the pcie driver submits icc bandwidth. And without
>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>> number and thus sets a very low emc freq. The question becomes... what
>> to do about it? If the related dt changes were submitted to
>> linux-next, everything should fall into place. And I'm not sure where
>> this falls on the severity scale since it doesn't full out break boot
>> or prevent operation.
> 
> Where are the related DT changes? If we can get these into -next and 
> lined up to be merged for v6.19, then that is fine. However, we should 

It's still breaking all the users then.

> not merge this for v6.19 without the DT changes.
> 
> I will also talk with Thierry to see if he has any concerns about users 
> seeing slow performance if they don't have an up-to-date DTB.
> 
> Is there any easy way to detect if the DTB has he necessary properties 
> to enable ICC scaling?
> 
> Jon
> 


Best regards,
Krzysztof

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 12/11/2025 07:26, Krzysztof Kozlowski wrote:
> On 12/11/2025 07:18, Jon Hunter wrote:
>>
>> On 11/11/2025 23:17, Aaron Kling wrote:
>>
>> ...
>>
>>> Alright, I think I've got the picture of what's going on now. The
>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>> my simple busybox ramdisk that I use for mainline regression testing
>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>> replicate the issue. And I don't see the issue on my normal use case,
>>> because I have the dt changes as well.
>>>
>>> So it appears that the pcie driver submits icc bandwidth. And without
>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>> number and thus sets a very low emc freq. The question becomes... what
>>> to do about it? If the related dt changes were submitted to
>>> linux-next, everything should fall into place. And I'm not sure where
>>> this falls on the severity scale since it doesn't full out break boot
>>> or prevent operation.
>>
>> Where are the related DT changes? If we can get these into -next and
>> lined up to be merged for v6.19, then that is fine. However, we should
> 
> It's still breaking all the users then.

Yes indeed.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Krzysztof Kozlowski 2 months, 4 weeks ago

On 12/11/2025 11:59, Jon Hunter wrote:
> 
> On 12/11/2025 07:26, Krzysztof Kozlowski wrote:
>> On 12/11/2025 07:18, Jon Hunter wrote:
>>>
>>> On 11/11/2025 23:17, Aaron Kling wrote:
>>>
>>> ...
>>>
>>>> Alright, I think I've got the picture of what's going on now. The
>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>>> my simple busybox ramdisk that I use for mainline regression testing
>>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>>> replicate the issue. And I don't see the issue on my normal use case,
>>>> because I have the dt changes as well.
>>>>
>>>> So it appears that the pcie driver submits icc bandwidth. And without
>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>>> number and thus sets a very low emc freq. The question becomes... what
>>>> to do about it? If the related dt changes were submitted to
>>>> linux-next, everything should fall into place. And I'm not sure where
>>>> this falls on the severity scale since it doesn't full out break boot
>>>> or prevent operation.
>>>
>>> Where are the related DT changes? If we can get these into -next and
>>> lined up to be merged for v6.19, then that is fine. However, we should
>>
>> It's still breaking all the users then.
> 
> Yes indeed.


Please test if dropping sync_state from memory controller drivers helps
you. This might be the easiest fix and it is also known solution when
there are no users.

Best regards,
Krzysztof

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 4 weeks ago

On 12/11/2025 11:42, Krzysztof Kozlowski wrote:
> On 12/11/2025 11:59, Jon Hunter wrote:
>>
>> On 12/11/2025 07:26, Krzysztof Kozlowski wrote:
>>> On 12/11/2025 07:18, Jon Hunter wrote:
>>>>
>>>> On 11/11/2025 23:17, Aaron Kling wrote:
>>>>
>>>> ...
>>>>
>>>>> Alright, I think I've got the picture of what's going on now. The
>>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>>>> my simple busybox ramdisk that I use for mainline regression testing
>>>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>>>> replicate the issue. And I don't see the issue on my normal use case,
>>>>> because I have the dt changes as well.
>>>>>
>>>>> So it appears that the pcie driver submits icc bandwidth. And without
>>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>>>> number and thus sets a very low emc freq. The question becomes... what
>>>>> to do about it? If the related dt changes were submitted to
>>>>> linux-next, everything should fall into place. And I'm not sure where
>>>>> this falls on the severity scale since it doesn't full out break boot
>>>>> or prevent operation.
>>>>
>>>> Where are the related DT changes? If we can get these into -next and
>>>> lined up to be merged for v6.19, then that is fine. However, we should
>>>
>>> It's still breaking all the users then.
>>
>> Yes indeed.
> 
> 
> Please test if dropping sync_state from memory controller drivers helps
> you. This might be the easiest fix and it is also known solution when
> there are no users.

I had a quick look, but I believe that sync_state was first added for 
Tegra234 devices. The current issue is with Tegra194, so I am not sure 
we can simply drop it.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months, 4 weeks ago

On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 11/11/2025 23:17, Aaron Kling wrote:
>
> ...
>
> > Alright, I think I've got the picture of what's going on now. The
> > standard arm64 defconfig enables the t194 pcie driver as a module. And
> > my simple busybox ramdisk that I use for mainline regression testing
> > isn't loading any modules. If I set the pcie driver to built-in, I
> > replicate the issue. And I don't see the issue on my normal use case,
> > because I have the dt changes as well.
> >
> > So it appears that the pcie driver submits icc bandwidth. And without
> > cpufreq submitting bandwidth as well, the emc driver gets a very low
> > number and thus sets a very low emc freq. The question becomes... what
> > to do about it? If the related dt changes were submitted to
> > linux-next, everything should fall into place. And I'm not sure where
> > this falls on the severity scale since it doesn't full out break boot
> > or prevent operation.
>
> Where are the related DT changes? If we can get these into -next and
> lined up to be merged for v6.19, then that is fine. However, we should
> not merge this for v6.19 without the DT changes.

The dt changes are here [0].

This was all part of the same series, keeping everything logically
related together. But on v2, Krzysztof said that none of this should
have ever been together and that each subsystem should get a separate
series, even if the changes are related. Which I did, and now this is
split across three series. The actmon series for tegra210 is in a
similar state. Split across four series and only one has been pulled
to linux-next.

> I will also talk with Thierry to see if he has any concerns about users
> seeing slow performance if they don't have an up-to-date DTB.
>
> Is there any easy way to detect if the DTB has he necessary properties
> to enable ICC scaling?

I'm not sure there is any simple way, given how I set up tegra186 and
tegra194. The new dt properties are on the cpu nodes, there's nothing
new for the emc node. So the emc driver just unconditionally declares
itself to icc. It was doing this before too, but wouldn't do anything
on tegra186 or tegra194 because the set_bw function was just a stub
and the real logic happened in the bpmp bw mgr, which only exists on
tegra234+. Now the set_bw function will directly calculate and set the
emc clock as long as the bpmp bw mgr is not supported. Offhand, I
can't think of anything existing to check to skip this, because
nothing new in the dt has been added in the scope of emc.

Aaron

[0] https://lore.kernel.org/r/20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months, 2 weeks ago

On 12/11/2025 07:21, Aaron Kling wrote:
> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>
>>
>> On 11/11/2025 23:17, Aaron Kling wrote:
>>
>> ...
>>
>>> Alright, I think I've got the picture of what's going on now. The
>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>> my simple busybox ramdisk that I use for mainline regression testing
>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>> replicate the issue. And I don't see the issue on my normal use case,
>>> because I have the dt changes as well.
>>>
>>> So it appears that the pcie driver submits icc bandwidth. And without
>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>> number and thus sets a very low emc freq. The question becomes... what
>>> to do about it? If the related dt changes were submitted to
>>> linux-next, everything should fall into place. And I'm not sure where
>>> this falls on the severity scale since it doesn't full out break boot
>>> or prevent operation.
>>
>> Where are the related DT changes? If we can get these into -next and
>> lined up to be merged for v6.19, then that is fine. However, we should
>> not merge this for v6.19 without the DT changes.
> 
> The dt changes are here [0].

To confirm, applying the DT changes do not fix this for me. Thierry is 
having a look at this to see if there is a way to fix this.

BTW, I have also noticed that Thierry's memory frequency test [0] is 
also failing on Tegra186. The test simply tries to set the frequency via 
the sysfs and this is now failing. I am seeing ...

memory: emc: - available rates: (* = current)
memory: emc:   -   40800000
memory: emc:   -   68000000
memory: emc:   -  102000000
memory: emc:   -  204000000
memory: emc:   -  408000000
memory: emc:   -  665600000
memory: emc:   -  800000000
memory: emc:   - 1062400000
memory: emc:   - 1331200000
memory: emc:   - 1600000000
memory: emc:   - 1866000000 *
memory: emc: - testing:
memory: emc:   -   40800000...OSError: [Errno 34] Numerical result out 
of range

Jon

[0] https://github.com/thierryreding/tegra-tests

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Krzysztof Kozlowski 2 months, 2 weeks ago

On 21/11/2025 12:21, Jon Hunter wrote:
> 
> On 12/11/2025 07:21, Aaron Kling wrote:
>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>>
>>>
>>> On 11/11/2025 23:17, Aaron Kling wrote:
>>>
>>> ...
>>>
>>>> Alright, I think I've got the picture of what's going on now. The
>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>>> my simple busybox ramdisk that I use for mainline regression testing
>>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>>> replicate the issue. And I don't see the issue on my normal use case,
>>>> because I have the dt changes as well.
>>>>
>>>> So it appears that the pcie driver submits icc bandwidth. And without
>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>>> number and thus sets a very low emc freq. The question becomes... what
>>>> to do about it? If the related dt changes were submitted to
>>>> linux-next, everything should fall into place. And I'm not sure where
>>>> this falls on the severity scale since it doesn't full out break boot
>>>> or prevent operation.
>>>
>>> Where are the related DT changes? If we can get these into -next and
>>> lined up to be merged for v6.19, then that is fine. However, we should
>>> not merge this for v6.19 without the DT changes.
>>
>> The dt changes are here [0].
> 
> To confirm, applying the DT changes do not fix this for me. Thierry is 
> having a look at this to see if there is a way to fix this.
> 
> BTW, I have also noticed that Thierry's memory frequency test [0] is 
> also failing on Tegra186. The test simply tries to set the frequency via 
> the sysfs and this is now failing. I am seeing ..


The pull request was not yet merged, so I can amend it. The issue was
reported 12 days ago, so if this cannot be fixed in for such time, then
it is not yet ready and I will drop the changes.

Best regards,
Krzysztof

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months ago

On Sat, Nov 22, 2025 at 6:01 AM Krzysztof Kozlowski <krzk@kernel.org> wrote:
>
> On 21/11/2025 12:21, Jon Hunter wrote:
> >
> > On 12/11/2025 07:21, Aaron Kling wrote:
> >> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>>
> >>>
> >>> On 11/11/2025 23:17, Aaron Kling wrote:
> >>>
> >>> ...
> >>>
> >>>> Alright, I think I've got the picture of what's going on now. The
> >>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
> >>>> my simple busybox ramdisk that I use for mainline regression testing
> >>>> isn't loading any modules. If I set the pcie driver to built-in, I
> >>>> replicate the issue. And I don't see the issue on my normal use case,
> >>>> because I have the dt changes as well.
> >>>>
> >>>> So it appears that the pcie driver submits icc bandwidth. And without
> >>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
> >>>> number and thus sets a very low emc freq. The question becomes... what
> >>>> to do about it? If the related dt changes were submitted to
> >>>> linux-next, everything should fall into place. And I'm not sure where
> >>>> this falls on the severity scale since it doesn't full out break boot
> >>>> or prevent operation.
> >>>
> >>> Where are the related DT changes? If we can get these into -next and
> >>> lined up to be merged for v6.19, then that is fine. However, we should
> >>> not merge this for v6.19 without the DT changes.
> >>
> >> The dt changes are here [0].
> >
> > To confirm, applying the DT changes do not fix this for me. Thierry is
> > having a look at this to see if there is a way to fix this.
> >
> > BTW, I have also noticed that Thierry's memory frequency test [0] is
> > also failing on Tegra186. The test simply tries to set the frequency via
> > the sysfs and this is now failing. I am seeing ..

With this patch dropped from -next, what needs to happen to get it
requeued? I gave an analysis over two weeks ago and have seen no
response since.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Krzysztof Kozlowski 2 months ago

On 09/12/2025 05:26, Aaron Kling wrote:
> On Sat, Nov 22, 2025 at 6:01 AM Krzysztof Kozlowski <krzk@kernel.org> wrote:
>>
>> On 21/11/2025 12:21, Jon Hunter wrote:
>>>
>>> On 12/11/2025 07:21, Aaron Kling wrote:
>>>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>>>>
>>>>>
>>>>> On 11/11/2025 23:17, Aaron Kling wrote:
>>>>>
>>>>> ...
>>>>>
>>>>>> Alright, I think I've got the picture of what's going on now. The
>>>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>>>>> my simple busybox ramdisk that I use for mainline regression testing
>>>>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>>>>> replicate the issue. And I don't see the issue on my normal use case,
>>>>>> because I have the dt changes as well.
>>>>>>
>>>>>> So it appears that the pcie driver submits icc bandwidth. And without
>>>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>>>>> number and thus sets a very low emc freq. The question becomes... what
>>>>>> to do about it? If the related dt changes were submitted to
>>>>>> linux-next, everything should fall into place. And I'm not sure where
>>>>>> this falls on the severity scale since it doesn't full out break boot
>>>>>> or prevent operation.
>>>>>
>>>>> Where are the related DT changes? If we can get these into -next and
>>>>> lined up to be merged for v6.19, then that is fine. However, we should
>>>>> not merge this for v6.19 without the DT changes.
>>>>
>>>> The dt changes are here [0].
>>>
>>> To confirm, applying the DT changes do not fix this for me. Thierry is
>>> having a look at this to see if there is a way to fix this.
>>>
>>> BTW, I have also noticed that Thierry's memory frequency test [0] is
>>> also failing on Tegra186. The test simply tries to set the frequency via
>>> the sysfs and this is now failing. I am seeing ..
> 
> With this patch dropped from -next, what needs to happen to get it
> requeued? I gave an analysis over two weeks ago and have seen no
> response since.

Hm, I did not see the root cause identified, so maybe I missed something.

Anyway, I am waiting for the patchset to be retested and resent. And
testing MUST include kernel development process rules, including how
patches are taken - see maintainer soc profile. Any dependencies must be
clearly marked.

Best regards,
Krzysztof

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months ago

On 09/12/2025 05:53, Krzysztof Kozlowski wrote:
> On 09/12/2025 05:26, Aaron Kling wrote:
>> On Sat, Nov 22, 2025 at 6:01 AM Krzysztof Kozlowski <krzk@kernel.org> wrote:
>>>
>>> On 21/11/2025 12:21, Jon Hunter wrote:
>>>>
>>>> On 12/11/2025 07:21, Aaron Kling wrote:
>>>>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>>>>>
>>>>>>
>>>>>> On 11/11/2025 23:17, Aaron Kling wrote:
>>>>>>
>>>>>> ...
>>>>>>
>>>>>>> Alright, I think I've got the picture of what's going on now. The
>>>>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>>>>>> my simple busybox ramdisk that I use for mainline regression testing
>>>>>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>>>>>> replicate the issue. And I don't see the issue on my normal use case,
>>>>>>> because I have the dt changes as well.
>>>>>>>
>>>>>>> So it appears that the pcie driver submits icc bandwidth. And without
>>>>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>>>>>> number and thus sets a very low emc freq. The question becomes... what
>>>>>>> to do about it? If the related dt changes were submitted to
>>>>>>> linux-next, everything should fall into place. And I'm not sure where
>>>>>>> this falls on the severity scale since it doesn't full out break boot
>>>>>>> or prevent operation.
>>>>>>
>>>>>> Where are the related DT changes? If we can get these into -next and
>>>>>> lined up to be merged for v6.19, then that is fine. However, we should
>>>>>> not merge this for v6.19 without the DT changes.
>>>>>
>>>>> The dt changes are here [0].
>>>>
>>>> To confirm, applying the DT changes do not fix this for me. Thierry is
>>>> having a look at this to see if there is a way to fix this.
>>>>
>>>> BTW, I have also noticed that Thierry's memory frequency test [0] is
>>>> also failing on Tegra186. The test simply tries to set the frequency via
>>>> the sysfs and this is now failing. I am seeing ..
>>
>> With this patch dropped from -next, what needs to happen to get it
>> requeued? I gave an analysis over two weeks ago and have seen no
>> response since.
> 
> Hm, I did not see the root cause identified, so maybe I missed something.
> 
> Anyway, I am waiting for the patchset to be retested and resent. And
> testing MUST include kernel development process rules, including how
> patches are taken - see maintainer soc profile. Any dependencies must be
> clearly marked.

Yes me too. I am happy to re-test any updates.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months, 2 weeks ago

On Fri, Nov 21, 2025 at 5:21 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 12/11/2025 07:21, Aaron Kling wrote:
> > On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>
> >>
> >> On 11/11/2025 23:17, Aaron Kling wrote:
> >>
> >> ...
> >>
> >>> Alright, I think I've got the picture of what's going on now. The
> >>> standard arm64 defconfig enables the t194 pcie driver as a module. And
> >>> my simple busybox ramdisk that I use for mainline regression testing
> >>> isn't loading any modules. If I set the pcie driver to built-in, I
> >>> replicate the issue. And I don't see the issue on my normal use case,
> >>> because I have the dt changes as well.
> >>>
> >>> So it appears that the pcie driver submits icc bandwidth. And without
> >>> cpufreq submitting bandwidth as well, the emc driver gets a very low
> >>> number and thus sets a very low emc freq. The question becomes... what
> >>> to do about it? If the related dt changes were submitted to
> >>> linux-next, everything should fall into place. And I'm not sure where
> >>> this falls on the severity scale since it doesn't full out break boot
> >>> or prevent operation.
> >>
> >> Where are the related DT changes? If we can get these into -next and
> >> lined up to be merged for v6.19, then that is fine. However, we should
> >> not merge this for v6.19 without the DT changes.
> >
> > The dt changes are here [0].
>
> To confirm, applying the DT changes do not fix this for me. Thierry is
> having a look at this to see if there is a way to fix this.
>
> BTW, I have also noticed that Thierry's memory frequency test [0] is
> also failing on Tegra186. The test simply tries to set the frequency via
> the sysfs and this is now failing. I am seeing ...
>
> memory: emc: - available rates: (* = current)
> memory: emc:   -   40800000
> memory: emc:   -   68000000
> memory: emc:   -  102000000
> memory: emc:   -  204000000
> memory: emc:   -  408000000
> memory: emc:   -  665600000
> memory: emc:   -  800000000
> memory: emc:   - 1062400000
> memory: emc:   - 1331200000
> memory: emc:   - 1600000000
> memory: emc:   - 1866000000 *
> memory: emc: - testing:
> memory: emc:   -   40800000...OSError: [Errno 34] Numerical result out
> of range

Question. Does this test run and pass on jetson-tk1? I based the
tegra210 and tegra186 [0] code on tegra124 [1]. And I don't see a
difference in the flow now. What appears to be happening is that icc
is reporting a high bandwidth, setting the emc min_freq to something
like 1600MHz. Then debugfs is having max_freq set to something low
like 40.8MHz. Then the linked code block fails because the higher of
the min_freqs is greater than the lower of the max_freqs. But if this
same test is run on jetson-tk1, I don't see how it passes. Unless
maybe the t124 actmon is consistently setting min freqs during the
tests.

An argument could be made that any attempt to set debugfs should win a
conflict with icc. That could be done. But if that needs done here,
I'd argue that it needs replicated across all other applicable emc
drivers too.

Aaron

[0] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/memory/tegra/tegra186-emc.c?h=next-20251121#n78
[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/memory/tegra/tegra124-emc.c?h=v6.18-rc6#n1066

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 2 months ago

On 21/11/2025 18:17, Aaron Kling wrote:
> On Fri, Nov 21, 2025 at 5:21 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>
>>
>> On 12/11/2025 07:21, Aaron Kling wrote:
>>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>>>
>>>>
>>>> On 11/11/2025 23:17, Aaron Kling wrote:
>>>>
>>>> ...
>>>>
>>>>> Alright, I think I've got the picture of what's going on now. The
>>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>>>> my simple busybox ramdisk that I use for mainline regression testing
>>>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>>>> replicate the issue. And I don't see the issue on my normal use case,
>>>>> because I have the dt changes as well.
>>>>>
>>>>> So it appears that the pcie driver submits icc bandwidth. And without
>>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>>>> number and thus sets a very low emc freq. The question becomes... what
>>>>> to do about it? If the related dt changes were submitted to
>>>>> linux-next, everything should fall into place. And I'm not sure where
>>>>> this falls on the severity scale since it doesn't full out break boot
>>>>> or prevent operation.
>>>>
>>>> Where are the related DT changes? If we can get these into -next and
>>>> lined up to be merged for v6.19, then that is fine. However, we should
>>>> not merge this for v6.19 without the DT changes.
>>>
>>> The dt changes are here [0].
>>
>> To confirm, applying the DT changes do not fix this for me. Thierry is
>> having a look at this to see if there is a way to fix this.
>>
>> BTW, I have also noticed that Thierry's memory frequency test [0] is
>> also failing on Tegra186. The test simply tries to set the frequency via
>> the sysfs and this is now failing. I am seeing ...
>>
>> memory: emc: - available rates: (* = current)
>> memory: emc:   -   40800000
>> memory: emc:   -   68000000
>> memory: emc:   -  102000000
>> memory: emc:   -  204000000
>> memory: emc:   -  408000000
>> memory: emc:   -  665600000
>> memory: emc:   -  800000000
>> memory: emc:   - 1062400000
>> memory: emc:   - 1331200000
>> memory: emc:   - 1600000000
>> memory: emc:   - 1866000000 *
>> memory: emc: - testing:
>> memory: emc:   -   40800000...OSError: [Errno 34] Numerical result out
>> of range
> 
> Question. Does this test run and pass on jetson-tk1? I based the
> tegra210 and tegra186 [0] code on tegra124 [1]. And I don't see a
> difference in the flow now. What appears to be happening is that icc
> is reporting a high bandwidth, setting the emc min_freq to something
> like 1600MHz. Then debugfs is having max_freq set to something low
> like 40.8MHz. Then the linked code block fails because the higher of
> the min_freqs is greater than the lower of the max_freqs. But if this
> same test is run on jetson-tk1, I don't see how it passes. Unless
> maybe the t124 actmon is consistently setting min freqs during the
> tests.

So we don't currently run this test on Tegra124. We could certainly try. 
I don't recall if there was an issue that prevented us from doing so now.

> An argument could be made that any attempt to set debugfs should win a
> conflict with icc. That could be done. But if that needs done here,
> I'd argue that it needs replicated across all other applicable emc
> drivers too.

The bottom line is that we cannot regress anything that was working before.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 2 months ago

On Tue, Dec 9, 2025 at 10:08 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 21/11/2025 18:17, Aaron Kling wrote:
> > On Fri, Nov 21, 2025 at 5:21 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>
> >>
> >> On 12/11/2025 07:21, Aaron Kling wrote:
> >>> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>>>
> >>>>
> >>>> On 11/11/2025 23:17, Aaron Kling wrote:
> >>>>
> >>>> ...
> >>>>
> >>>>> Alright, I think I've got the picture of what's going on now. The
> >>>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
> >>>>> my simple busybox ramdisk that I use for mainline regression testing
> >>>>> isn't loading any modules. If I set the pcie driver to built-in, I
> >>>>> replicate the issue. And I don't see the issue on my normal use case,
> >>>>> because I have the dt changes as well.
> >>>>>
> >>>>> So it appears that the pcie driver submits icc bandwidth. And without
> >>>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
> >>>>> number and thus sets a very low emc freq. The question becomes... what
> >>>>> to do about it? If the related dt changes were submitted to
> >>>>> linux-next, everything should fall into place. And I'm not sure where
> >>>>> this falls on the severity scale since it doesn't full out break boot
> >>>>> or prevent operation.
> >>>>
> >>>> Where are the related DT changes? If we can get these into -next and
> >>>> lined up to be merged for v6.19, then that is fine. However, we should
> >>>> not merge this for v6.19 without the DT changes.
> >>>
> >>> The dt changes are here [0].
> >>
> >> To confirm, applying the DT changes do not fix this for me. Thierry is
> >> having a look at this to see if there is a way to fix this.
> >>
> >> BTW, I have also noticed that Thierry's memory frequency test [0] is
> >> also failing on Tegra186. The test simply tries to set the frequency via
> >> the sysfs and this is now failing. I am seeing ...
> >>
> >> memory: emc: - available rates: (* = current)
> >> memory: emc:   -   40800000
> >> memory: emc:   -   68000000
> >> memory: emc:   -  102000000
> >> memory: emc:   -  204000000
> >> memory: emc:   -  408000000
> >> memory: emc:   -  665600000
> >> memory: emc:   -  800000000
> >> memory: emc:   - 1062400000
> >> memory: emc:   - 1331200000
> >> memory: emc:   - 1600000000
> >> memory: emc:   - 1866000000 *
> >> memory: emc: - testing:
> >> memory: emc:   -   40800000...OSError: [Errno 34] Numerical result out
> >> of range
> >
> > Question. Does this test run and pass on jetson-tk1? I based the
> > tegra210 and tegra186 [0] code on tegra124 [1]. And I don't see a
> > difference in the flow now. What appears to be happening is that icc
> > is reporting a high bandwidth, setting the emc min_freq to something
> > like 1600MHz. Then debugfs is having max_freq set to something low
> > like 40.8MHz. Then the linked code block fails because the higher of
> > the min_freqs is greater than the lower of the max_freqs. But if this
> > same test is run on jetson-tk1, I don't see how it passes. Unless
> > maybe the t124 actmon is consistently setting min freqs during the
> > tests.
>
> So we don't currently run this test on Tegra124. We could certainly try.
> I don't recall if there was an issue that prevented us from doing so now.
>
> > An argument could be made that any attempt to set debugfs should win a
> > conflict with icc. That could be done. But if that needs done here,
> > I'd argue that it needs replicated across all other applicable emc
> > drivers too.
>
> The bottom line is that we cannot regress anything that was working before.

Let me try to iterate the potential issues I've seen stated here. If
I'm missing anything, please fill in the blanks.

1) If this change is applied without the related dt change and the
pcie drvier is loaded, the emc clock can become stuck at the lowest
rate. This is caused by the pcie driver providing icc data, but
nothing else is. So the very low requested bandwidth results in the
emc clock being set very low. I'm not sure there is a 'fix' for this,
beyond making sure the dt change is merged to ensure that the cpufreq
driver provides bandwidth info, causing the emc driver to select a
more reasonable emc clock rate. This is a similar situation to what's
currently blocking the tegra210 actmon series. I don't think there is
a way for the drivers to know if icc data is missing/wrong. The
scaling is doing exactly what it's told based on the icc routing given
in the dt.

2) Jon, you report that even with both this change and the related dt
change, that the issue is still not fixed. But then posted a log
showing that the emc rate is set to max. If the issue is that emc rate
is too low, then how can debugfs report that the rate is max? For
reference, everything scales as expected for me given this change plus
the dt change on both p2771 and p3636+p3509.

3) If icc is requesting enough bandwidth to set the emc clock to a
high value, then a user tries to set debugfs max_freq to a lower
value, this code will reject the change. I do not believe this is an
issue unique to this code. tegra20-emc, tegra30-emc, and tegra124-emc
all have this same flow. And so does my proposed change to
tegra210-emc-core in the actmon series. This is why I asked if
tegra124 ran this test, to see if the failure was unique. If this is
not a unique failure, then I'd argue that all instances need changed,
not just this one causing diverging results depending on the soc being
utilized. A lot of the work I'm doing is to try to bring unity and
feature parity to all the tegra socs I'm working on. I don't want to
cause even more divergence.

What actions need taken for which issue?

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 1 month, 4 weeks ago

On 10/12/2025 05:06, Aaron Kling wrote:

...

> Let me try to iterate the potential issues I've seen stated here. If
> I'm missing anything, please fill in the blanks.
> 
> 1) If this change is applied without the related dt change and the
> pcie drvier is loaded, the emc clock can become stuck at the lowest
> rate. This is caused by the pcie driver providing icc data, but
> nothing else is. So the very low requested bandwidth results in the
> emc clock being set very low. I'm not sure there is a 'fix' for this,
> beyond making sure the dt change is merged to ensure that the cpufreq
> driver provides bandwidth info, causing the emc driver to select a
> more reasonable emc clock rate. This is a similar situation to what's
> currently blocking the tegra210 actmon series. I don't think there is
> a way for the drivers to know if icc data is missing/wrong. The
> scaling is doing exactly what it's told based on the icc routing given
> in the dt.

So this is the fundamental issue with this that must be fixed. We can't 
allow the PCIe driver to slow the system down. I think that Krzysztof 
suggested we need some way to determine if the necessary ICC clients are 
present/registered for ICC to work. Admittedly, I have no idea if there 
is a simple way to do this, but we need something like that.

> 2) Jon, you report that even with both this change and the related dt
> change, that the issue is still not fixed. But then posted a log
> showing that the emc rate is set to max. If the issue is that emc rate
> is too low, then how can debugfs report that the rate is max? For
> reference, everything scales as expected for me given this change plus
> the dt change on both p2771 and p3636+p3509.

To clarify, this broke the boot test on Tegra194 because the boot was 
too slow. However, this also broke the EMC test on Tegra186 because 
setting the frequency from the debugfs failed. So two different failures 
on two different devices. I am guessing the EMC test would also fail on 
Tegra194, but given that it does not boot, we did not get that far.

> 3) If icc is requesting enough bandwidth to set the emc clock to a
> high value, then a user tries to set debugfs max_freq to a lower
> value, this code will reject the change. I do not believe this is an
> issue unique to this code. tegra20-emc, tegra30-emc, and tegra124-emc
> all have this same flow. And so does my proposed change to
> tegra210-emc-core in the actmon series. This is why I asked if
> tegra124 ran this test, to see if the failure was unique. If this is
> not a unique failure, then I'd argue that all instances need changed,
> not just this one causing diverging results depending on the soc being
> utilized. A lot of the work I'm doing is to try to bring unity and
> feature parity to all the tegra socs I'm working on. I don't want to
> cause even more divergence.

Yes that is fair point, however, we need to detect this in the 
tegra-tests so that we know that this will not work. It would be nice if 
we could disable ICC from userspace and then run the test.

Bottom line here is that #1 is the problem that needs to be fixed.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 4 weeks ago

On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 10/12/2025 05:06, Aaron Kling wrote:
>
> ...
>
> > Let me try to iterate the potential issues I've seen stated here. If
> > I'm missing anything, please fill in the blanks.
> >
> > 1) If this change is applied without the related dt change and the
> > pcie drvier is loaded, the emc clock can become stuck at the lowest
> > rate. This is caused by the pcie driver providing icc data, but
> > nothing else is. So the very low requested bandwidth results in the
> > emc clock being set very low. I'm not sure there is a 'fix' for this,
> > beyond making sure the dt change is merged to ensure that the cpufreq
> > driver provides bandwidth info, causing the emc driver to select a
> > more reasonable emc clock rate. This is a similar situation to what's
> > currently blocking the tegra210 actmon series. I don't think there is
> > a way for the drivers to know if icc data is missing/wrong. The
> > scaling is doing exactly what it's told based on the icc routing given
> > in the dt.
>
> So this is the fundamental issue with this that must be fixed. We can't
> allow the PCIe driver to slow the system down. I think that Krzysztof
> suggested we need some way to determine if the necessary ICC clients are
> present/registered for ICC to work. Admittedly, I have no idea if there
> is a simple way to do this, but we need something like that.

I'm not sure I understand how checking clients would work. Is there a
mechanism for the emc driver to know if cpufreq is registered to icc
in a way that works with probe deferrals, but also allows for it to be
optional?

Alternatively if there is not, can we just accept the abi break and
have this and the dt change depend on each other? I know it's not
desirable or the first choice, but if the other option is to rewrite
part of the icc system, then perhaps it should be an option.

> > 2) Jon, you report that even with both this change and the related dt
> > change, that the issue is still not fixed. But then posted a log
> > showing that the emc rate is set to max. If the issue is that emc rate
> > is too low, then how can debugfs report that the rate is max? For
> > reference, everything scales as expected for me given this change plus
> > the dt change on both p2771 and p3636+p3509.
>
> To clarify, this broke the boot test on Tegra194 because the boot was
> too slow. However, this also broke the EMC test on Tegra186 because
> setting the frequency from the debugfs failed. So two different failures
> on two different devices. I am guessing the EMC test would also fail on
> Tegra194, but given that it does not boot, we did not get that far.

So you're saying that even with the dt changes, this change on
tegra194 still does not boot before the regression test framework
times out? If so, I need some more details about this. I have not seen
issues on p2972 or p3518. For example, if I boot to android recovery
where I set the cpufreq governor to performance, I see emc clock rate
set to 2133 MHz and 1600 MHz respectively. And boot time from kernel
start to pixels on display is 15 seconds, give or take a couple
seconds. This is using the boot stack from l4t r32.7.6.

> > 3) If icc is requesting enough bandwidth to set the emc clock to a
> > high value, then a user tries to set debugfs max_freq to a lower
> > value, this code will reject the change. I do not believe this is an
> > issue unique to this code. tegra20-emc, tegra30-emc, and tegra124-emc
> > all have this same flow. And so does my proposed change to
> > tegra210-emc-core in the actmon series. This is why I asked if
> > tegra124 ran this test, to see if the failure was unique. If this is
> > not a unique failure, then I'd argue that all instances need changed,
> > not just this one causing diverging results depending on the soc being
> > utilized. A lot of the work I'm doing is to try to bring unity and
> > feature parity to all the tegra socs I'm working on. I don't want to
> > cause even more divergence.
>
> Yes that is fair point, however, we need to detect this in the
> tegra-tests so that we know that this will not work. It would be nice if
> we could disable ICC from userspace and then run the test.

I am unaware of a way to disable icc from userspace. That would be
useful to me as well. And for the record, I'm not refusing to make
such a change. I would just want to have a series to change all the
others uploaded and merged concurrently. But I cannot test t20 or t30.
Only t124+.

> Bottom line here is that #1 is the problem that needs to be fixed.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 1 month, 4 weeks ago

On 10/12/2025 18:32, Aaron Kling wrote:
> On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>
>>
>> On 10/12/2025 05:06, Aaron Kling wrote:
>>
>> ...
>>
>>> Let me try to iterate the potential issues I've seen stated here. If
>>> I'm missing anything, please fill in the blanks.
>>>
>>> 1) If this change is applied without the related dt change and the
>>> pcie drvier is loaded, the emc clock can become stuck at the lowest
>>> rate. This is caused by the pcie driver providing icc data, but
>>> nothing else is. So the very low requested bandwidth results in the
>>> emc clock being set very low. I'm not sure there is a 'fix' for this,
>>> beyond making sure the dt change is merged to ensure that the cpufreq
>>> driver provides bandwidth info, causing the emc driver to select a
>>> more reasonable emc clock rate. This is a similar situation to what's
>>> currently blocking the tegra210 actmon series. I don't think there is
>>> a way for the drivers to know if icc data is missing/wrong. The
>>> scaling is doing exactly what it's told based on the icc routing given
>>> in the dt.
>>
>> So this is the fundamental issue with this that must be fixed. We can't
>> allow the PCIe driver to slow the system down. I think that Krzysztof
>> suggested we need some way to determine if the necessary ICC clients are
>> present/registered for ICC to work. Admittedly, I have no idea if there
>> is a simple way to do this, but we need something like that.
> 
> I'm not sure I understand how checking clients would work. Is there a
> mechanism for the emc driver to know if cpufreq is registered to icc
> in a way that works with probe deferrals, but also allows for it to be
> optional?

I am not sure if such a mechanism exists either, but it seems that we 
need something like this.

> Alternatively if there is not, can we just accept the abi break and
> have this and the dt change depend on each other? I know it's not
> desirable or the first choice, but if the other option is to rewrite
> part of the icc system, then perhaps it should be an option.

I am not sure it is an ABI break, but the default performance might be 
worse. I am not sure if you are proposing a way to enforce the 
dependency or just saying that there is a dependency. We can't do the 
latter, but if there is a way for the kernel to check the dependency and 
make the right choice, then that should work.

>>> 2) Jon, you report that even with both this change and the related dt
>>> change, that the issue is still not fixed. But then posted a log
>>> showing that the emc rate is set to max. If the issue is that emc rate
>>> is too low, then how can debugfs report that the rate is max? For
>>> reference, everything scales as expected for me given this change plus
>>> the dt change on both p2771 and p3636+p3509.
>>
>> To clarify, this broke the boot test on Tegra194 because the boot was
>> too slow. However, this also broke the EMC test on Tegra186 because
>> setting the frequency from the debugfs failed. So two different failures
>> on two different devices. I am guessing the EMC test would also fail on
>> Tegra194, but given that it does not boot, we did not get that far.
> 
> So you're saying that even with the dt changes, this change on
> tegra194 still does not boot before the regression test framework
> times out? If so, I need some more details about this. I have not seen
> issues on p2972 or p3518. For example, if I boot to android recovery
> where I set the cpufreq governor to performance, I see emc clock rate
> set to 2133 MHz and 1600 MHz respectively. And boot time from kernel
> start to pixels on display is 15 seconds, give or take a couple
> seconds. This is using the boot stack from l4t r32.7.6.

Yes. The boot failure here is not a hard boot failure, but the device 
takes too long to boot and the boot test times out. And no we will not 
increase the timeout as it is there for a reason. It could well be 
because the default governor is not set to performance. If you boot with 
just using the stock 'defconfig' for ARM64 without setting the governor 
does it take longer?

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 4 weeks ago

On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 10/12/2025 18:32, Aaron Kling wrote:
> > On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>
> >>
> >> On 10/12/2025 05:06, Aaron Kling wrote:
> >>
> >> ...
> >>
> >>> Let me try to iterate the potential issues I've seen stated here. If
> >>> I'm missing anything, please fill in the blanks.
> >>>
> >>> 1) If this change is applied without the related dt change and the
> >>> pcie drvier is loaded, the emc clock can become stuck at the lowest
> >>> rate. This is caused by the pcie driver providing icc data, but
> >>> nothing else is. So the very low requested bandwidth results in the
> >>> emc clock being set very low. I'm not sure there is a 'fix' for this,
> >>> beyond making sure the dt change is merged to ensure that the cpufreq
> >>> driver provides bandwidth info, causing the emc driver to select a
> >>> more reasonable emc clock rate. This is a similar situation to what's
> >>> currently blocking the tegra210 actmon series. I don't think there is
> >>> a way for the drivers to know if icc data is missing/wrong. The
> >>> scaling is doing exactly what it's told based on the icc routing given
> >>> in the dt.
> >>
> >> So this is the fundamental issue with this that must be fixed. We can't
> >> allow the PCIe driver to slow the system down. I think that Krzysztof
> >> suggested we need some way to determine if the necessary ICC clients are
> >> present/registered for ICC to work. Admittedly, I have no idea if there
> >> is a simple way to do this, but we need something like that.
> >
> > I'm not sure I understand how checking clients would work. Is there a
> > mechanism for the emc driver to know if cpufreq is registered to icc
> > in a way that works with probe deferrals, but also allows for it to be
> > optional?
>
> I am not sure if such a mechanism exists either, but it seems that we
> need something like this.
>
> > Alternatively if there is not, can we just accept the abi break and
> > have this and the dt change depend on each other? I know it's not
> > desirable or the first choice, but if the other option is to rewrite
> > part of the icc system, then perhaps it should be an option.
>
> I am not sure it is an ABI break, but the default performance might be
> worse. I am not sure if you are proposing a way to enforce the
> dependency or just saying that there is a dependency. We can't do the
> latter, but if there is a way for the kernel to check the dependency and
> make the right choice, then that should work.

So we can't accept that older dt's will run slower on a newer kernel
and say that a newer dt is needed for full performance?

If that's not an option, then I have no idea how to resolve this. I'm
not greatly knowledgeable about the icc subsystem. I can try to look
into options, but I'm not greatly optimistic about me finding one. If
someone could suggest a concept on how to make it work, I could
implement it. But I'm not even seeing the concept right now.

> >>> 2) Jon, you report that even with both this change and the related dt
> >>> change, that the issue is still not fixed. But then posted a log
> >>> showing that the emc rate is set to max. If the issue is that emc rate
> >>> is too low, then how can debugfs report that the rate is max? For
> >>> reference, everything scales as expected for me given this change plus
> >>> the dt change on both p2771 and p3636+p3509.
> >>
> >> To clarify, this broke the boot test on Tegra194 because the boot was
> >> too slow. However, this also broke the EMC test on Tegra186 because
> >> setting the frequency from the debugfs failed. So two different failures
> >> on two different devices. I am guessing the EMC test would also fail on
> >> Tegra194, but given that it does not boot, we did not get that far.
> >
> > So you're saying that even with the dt changes, this change on
> > tegra194 still does not boot before the regression test framework
> > times out? If so, I need some more details about this. I have not seen
> > issues on p2972 or p3518. For example, if I boot to android recovery
> > where I set the cpufreq governor to performance, I see emc clock rate
> > set to 2133 MHz and 1600 MHz respectively. And boot time from kernel
> > start to pixels on display is 15 seconds, give or take a couple
> > seconds. This is using the boot stack from l4t r32.7.6.
>
> Yes. The boot failure here is not a hard boot failure, but the device
> takes too long to boot and the boot test times out. And no we will not
> increase the timeout as it is there for a reason. It could well be
> because the default governor is not set to performance. If you boot with
> just using the stock 'defconfig' for ARM64 without setting the governor
> does it take longer?

So, I checked out next-20251210, then b4 shazam'ed this series and the
matching dt series,
20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with
LLVM=1 ARCH=arm64 make defconfig
LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb

I packaged them into an android boot image using a lightly modified
copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note
that this includes no modules, and since the pcie driver is =m in
defconfig, it is not included. Then I flashed that with the l4t
r32.7.6 boot stack to p2972. I got the shell on uart after 4.275
seconds in the kernel. Per sysfs, the cpufreq governor is schedutil
and all policies are idling at min freq, 115200. And per debugfs, the
emc clock is 800000000. All this looks to be as expected.

I have no idea why the regression test setup is timing out. I have not
seen the issue through any of my testing. On pure mainline as per the
above paragraph, or with the patches on the android common kernel, as
per my target use case. I don't know what to do if I can't replicate
the issue. I don't suppose the flash package for the regression test
setup is something that could be released?

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 1 month, 4 weeks ago

On 10/12/2025 22:41, Aaron Kling wrote:
> On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>>
>>
>> On 10/12/2025 18:32, Aaron Kling wrote:
>>> On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>>>
>>>>
>>>> On 10/12/2025 05:06, Aaron Kling wrote:
>>>>
>>>> ...
>>>>
>>>>> Let me try to iterate the potential issues I've seen stated here. If
>>>>> I'm missing anything, please fill in the blanks.
>>>>>
>>>>> 1) If this change is applied without the related dt change and the
>>>>> pcie drvier is loaded, the emc clock can become stuck at the lowest
>>>>> rate. This is caused by the pcie driver providing icc data, but
>>>>> nothing else is. So the very low requested bandwidth results in the
>>>>> emc clock being set very low. I'm not sure there is a 'fix' for this,
>>>>> beyond making sure the dt change is merged to ensure that the cpufreq
>>>>> driver provides bandwidth info, causing the emc driver to select a
>>>>> more reasonable emc clock rate. This is a similar situation to what's
>>>>> currently blocking the tegra210 actmon series. I don't think there is
>>>>> a way for the drivers to know if icc data is missing/wrong. The
>>>>> scaling is doing exactly what it's told based on the icc routing given
>>>>> in the dt.
>>>>
>>>> So this is the fundamental issue with this that must be fixed. We can't
>>>> allow the PCIe driver to slow the system down. I think that Krzysztof
>>>> suggested we need some way to determine if the necessary ICC clients are
>>>> present/registered for ICC to work. Admittedly, I have no idea if there
>>>> is a simple way to do this, but we need something like that.
>>>
>>> I'm not sure I understand how checking clients would work. Is there a
>>> mechanism for the emc driver to know if cpufreq is registered to icc
>>> in a way that works with probe deferrals, but also allows for it to be
>>> optional?
>>
>> I am not sure if such a mechanism exists either, but it seems that we
>> need something like this.
>>
>>> Alternatively if there is not, can we just accept the abi break and
>>> have this and the dt change depend on each other? I know it's not
>>> desirable or the first choice, but if the other option is to rewrite
>>> part of the icc system, then perhaps it should be an option.
>>
>> I am not sure it is an ABI break, but the default performance might be
>> worse. I am not sure if you are proposing a way to enforce the
>> dependency or just saying that there is a dependency. We can't do the
>> latter, but if there is a way for the kernel to check the dependency and
>> make the right choice, then that should work.
> 
> So we can't accept that older dt's will run slower on a newer kernel
> and say that a newer dt is needed for full performance?
> 
> If that's not an option, then I have no idea how to resolve this. I'm
> not greatly knowledgeable about the icc subsystem. I can try to look
> into options, but I'm not greatly optimistic about me finding one. If
> someone could suggest a concept on how to make it work, I could
> implement it. But I'm not even seeing the concept right now.
> 
>>>>> 2) Jon, you report that even with both this change and the related dt
>>>>> change, that the issue is still not fixed. But then posted a log
>>>>> showing that the emc rate is set to max. If the issue is that emc rate
>>>>> is too low, then how can debugfs report that the rate is max? For
>>>>> reference, everything scales as expected for me given this change plus
>>>>> the dt change on both p2771 and p3636+p3509.
>>>>
>>>> To clarify, this broke the boot test on Tegra194 because the boot was
>>>> too slow. However, this also broke the EMC test on Tegra186 because
>>>> setting the frequency from the debugfs failed. So two different failures
>>>> on two different devices. I am guessing the EMC test would also fail on
>>>> Tegra194, but given that it does not boot, we did not get that far.
>>>
>>> So you're saying that even with the dt changes, this change on
>>> tegra194 still does not boot before the regression test framework
>>> times out? If so, I need some more details about this. I have not seen
>>> issues on p2972 or p3518. For example, if I boot to android recovery
>>> where I set the cpufreq governor to performance, I see emc clock rate
>>> set to 2133 MHz and 1600 MHz respectively. And boot time from kernel
>>> start to pixels on display is 15 seconds, give or take a couple
>>> seconds. This is using the boot stack from l4t r32.7.6.
>>
>> Yes. The boot failure here is not a hard boot failure, but the device
>> takes too long to boot and the boot test times out. And no we will not
>> increase the timeout as it is there for a reason. It could well be
>> because the default governor is not set to performance. If you boot with
>> just using the stock 'defconfig' for ARM64 without setting the governor
>> does it take longer?
> 
> So, I checked out next-20251210, then b4 shazam'ed this series and the
> matching dt series,
> 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with
> LLVM=1 ARCH=arm64 make defconfig
> LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb
> 
> I packaged them into an android boot image using a lightly modified
> copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note
> that this includes no modules, and since the pcie driver is =m in
> defconfig, it is not included. Then I flashed that with the l4t
> r32.7.6 boot stack to p2972. I got the shell on uart after 4.275
> seconds in the kernel. Per sysfs, the cpufreq governor is schedutil
> and all policies are idling at min freq, 115200. And per debugfs, the
> emc clock is 800000000. All this looks to be as expected.
> 
> I have no idea why the regression test setup is timing out. I have not
> seen the issue through any of my testing. On pure mainline as per the
> above paragraph, or with the patches on the android common kernel, as
> per my target use case. I don't know what to do if I can't replicate
> the issue. I don't suppose the flash package for the regression test
> setup is something that could be released?

I thought we already concluded that you did not see this because you did 
not have the PCIe module present in your testing? From the above its 
sounds like you still don't have that driver present and so you don't 
see the issue. I guess I am not surprised by that but I am not sure why 
you are now saying you have no idea why this is timing out? I thought 
this was understood.

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 4 weeks ago

On Thu, Dec 11, 2025 at 1:47 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 10/12/2025 22:41, Aaron Kling wrote:
> > On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>
> >>
> >> On 10/12/2025 18:32, Aaron Kling wrote:
> >>> On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>>>
> >>>>
> >>>> On 10/12/2025 05:06, Aaron Kling wrote:
> >>>>
> >>>> ...
> >>>>
> >>>>> Let me try to iterate the potential issues I've seen stated here. If
> >>>>> I'm missing anything, please fill in the blanks.
> >>>>>
> >>>>> 1) If this change is applied without the related dt change and the
> >>>>> pcie drvier is loaded, the emc clock can become stuck at the lowest
> >>>>> rate. This is caused by the pcie driver providing icc data, but
> >>>>> nothing else is. So the very low requested bandwidth results in the
> >>>>> emc clock being set very low. I'm not sure there is a 'fix' for this,
> >>>>> beyond making sure the dt change is merged to ensure that the cpufreq
> >>>>> driver provides bandwidth info, causing the emc driver to select a
> >>>>> more reasonable emc clock rate. This is a similar situation to what's
> >>>>> currently blocking the tegra210 actmon series. I don't think there is
> >>>>> a way for the drivers to know if icc data is missing/wrong. The
> >>>>> scaling is doing exactly what it's told based on the icc routing given
> >>>>> in the dt.
> >>>>
> >>>> So this is the fundamental issue with this that must be fixed. We can't
> >>>> allow the PCIe driver to slow the system down. I think that Krzysztof
> >>>> suggested we need some way to determine if the necessary ICC clients are
> >>>> present/registered for ICC to work. Admittedly, I have no idea if there
> >>>> is a simple way to do this, but we need something like that.
> >>>
> >>> I'm not sure I understand how checking clients would work. Is there a
> >>> mechanism for the emc driver to know if cpufreq is registered to icc
> >>> in a way that works with probe deferrals, but also allows for it to be
> >>> optional?
> >>
> >> I am not sure if such a mechanism exists either, but it seems that we
> >> need something like this.
> >>
> >>> Alternatively if there is not, can we just accept the abi break and
> >>> have this and the dt change depend on each other? I know it's not
> >>> desirable or the first choice, but if the other option is to rewrite
> >>> part of the icc system, then perhaps it should be an option.
> >>
> >> I am not sure it is an ABI break, but the default performance might be
> >> worse. I am not sure if you are proposing a way to enforce the
> >> dependency or just saying that there is a dependency. We can't do the
> >> latter, but if there is a way for the kernel to check the dependency and
> >> make the right choice, then that should work.
> >
> > So we can't accept that older dt's will run slower on a newer kernel
> > and say that a newer dt is needed for full performance?
> >
> > If that's not an option, then I have no idea how to resolve this. I'm
> > not greatly knowledgeable about the icc subsystem. I can try to look
> > into options, but I'm not greatly optimistic about me finding one. If
> > someone could suggest a concept on how to make it work, I could
> > implement it. But I'm not even seeing the concept right now.
> >
> >>>>> 2) Jon, you report that even with both this change and the related dt
> >>>>> change, that the issue is still not fixed. But then posted a log
> >>>>> showing that the emc rate is set to max. If the issue is that emc rate
> >>>>> is too low, then how can debugfs report that the rate is max? For
> >>>>> reference, everything scales as expected for me given this change plus
> >>>>> the dt change on both p2771 and p3636+p3509.
> >>>>
> >>>> To clarify, this broke the boot test on Tegra194 because the boot was
> >>>> too slow. However, this also broke the EMC test on Tegra186 because
> >>>> setting the frequency from the debugfs failed. So two different failures
> >>>> on two different devices. I am guessing the EMC test would also fail on
> >>>> Tegra194, but given that it does not boot, we did not get that far.
> >>>
> >>> So you're saying that even with the dt changes, this change on
> >>> tegra194 still does not boot before the regression test framework
> >>> times out? If so, I need some more details about this. I have not seen
> >>> issues on p2972 or p3518. For example, if I boot to android recovery
> >>> where I set the cpufreq governor to performance, I see emc clock rate
> >>> set to 2133 MHz and 1600 MHz respectively. And boot time from kernel
> >>> start to pixels on display is 15 seconds, give or take a couple
> >>> seconds. This is using the boot stack from l4t r32.7.6.
> >>
> >> Yes. The boot failure here is not a hard boot failure, but the device
> >> takes too long to boot and the boot test times out. And no we will not
> >> increase the timeout as it is there for a reason. It could well be
> >> because the default governor is not set to performance. If you boot with
> >> just using the stock 'defconfig' for ARM64 without setting the governor
> >> does it take longer?
> >
> > So, I checked out next-20251210, then b4 shazam'ed this series and the
> > matching dt series,
> > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with
> > LLVM=1 ARCH=arm64 make defconfig
> > LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb
> >
> > I packaged them into an android boot image using a lightly modified
> > copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note
> > that this includes no modules, and since the pcie driver is =m in
> > defconfig, it is not included. Then I flashed that with the l4t
> > r32.7.6 boot stack to p2972. I got the shell on uart after 4.275
> > seconds in the kernel. Per sysfs, the cpufreq governor is schedutil
> > and all policies are idling at min freq, 115200. And per debugfs, the
> > emc clock is 800000000. All this looks to be as expected.
> >
> > I have no idea why the regression test setup is timing out. I have not
> > seen the issue through any of my testing. On pure mainline as per the
> > above paragraph, or with the patches on the android common kernel, as
> > per my target use case. I don't know what to do if I can't replicate
> > the issue. I don't suppose the flash package for the regression test
> > setup is something that could be released?
>
> I thought we already concluded that you did not see this because you did
> not have the PCIe module present in your testing? From the above its
> sounds like you still don't have that driver present and so you don't
> see the issue. I guess I am not surprised by that but I am not sure why
> you are now saying you have no idea why this is timing out? I thought
> this was understood.

Oh, come on... The issue is a combination of old dt AND the pcie
driver. I can reproduce low emc clock with that. But then you said
t194 on the regression bench was still timing out even with the new
dt. And that's what I cannot reproduce. And then you asked me to test
with pure mainline and a stock/unmodified defconfig. So I did, using
-next and the two open series, but clarified what an unmodified
defconfig meant.

So, I modified the .config to enable the pcie driver as built-in, then
reflashed. Otherwise the same as my previous post. I got the shell
after 11 seconds. And clocks are still as reported before, cpu at min,
emc at 800000000.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 3 weeks ago

On Thu, Dec 11, 2025 at 11:39 AM Aaron Kling <webgeek1234@gmail.com> wrote:
>
> On Thu, Dec 11, 2025 at 1:47 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >
> >
> > On 10/12/2025 22:41, Aaron Kling wrote:
> > > On Wed, Dec 10, 2025 at 3:24 PM Jon Hunter <jonathanh@nvidia.com> wrote:
> > >>
> > >>
> > >> On 10/12/2025 18:32, Aaron Kling wrote:
> > >>> On Wed, Dec 10, 2025 at 9:04 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> > >>>>
> > >>>>
> > >>>> On 10/12/2025 05:06, Aaron Kling wrote:
> > >>>>
> > >>>> ...
> > >>>>
> > >>>>> Let me try to iterate the potential issues I've seen stated here. If
> > >>>>> I'm missing anything, please fill in the blanks.
> > >>>>>
> > >>>>> 1) If this change is applied without the related dt change and the
> > >>>>> pcie drvier is loaded, the emc clock can become stuck at the lowest
> > >>>>> rate. This is caused by the pcie driver providing icc data, but
> > >>>>> nothing else is. So the very low requested bandwidth results in the
> > >>>>> emc clock being set very low. I'm not sure there is a 'fix' for this,
> > >>>>> beyond making sure the dt change is merged to ensure that the cpufreq
> > >>>>> driver provides bandwidth info, causing the emc driver to select a
> > >>>>> more reasonable emc clock rate. This is a similar situation to what's
> > >>>>> currently blocking the tegra210 actmon series. I don't think there is
> > >>>>> a way for the drivers to know if icc data is missing/wrong. The
> > >>>>> scaling is doing exactly what it's told based on the icc routing given
> > >>>>> in the dt.
> > >>>>
> > >>>> So this is the fundamental issue with this that must be fixed. We can't
> > >>>> allow the PCIe driver to slow the system down. I think that Krzysztof
> > >>>> suggested we need some way to determine if the necessary ICC clients are
> > >>>> present/registered for ICC to work. Admittedly, I have no idea if there
> > >>>> is a simple way to do this, but we need something like that.
> > >>>
> > >>> I'm not sure I understand how checking clients would work. Is there a
> > >>> mechanism for the emc driver to know if cpufreq is registered to icc
> > >>> in a way that works with probe deferrals, but also allows for it to be
> > >>> optional?
> > >>
> > >> I am not sure if such a mechanism exists either, but it seems that we
> > >> need something like this.
> > >>
> > >>> Alternatively if there is not, can we just accept the abi break and
> > >>> have this and the dt change depend on each other? I know it's not
> > >>> desirable or the first choice, but if the other option is to rewrite
> > >>> part of the icc system, then perhaps it should be an option.
> > >>
> > >> I am not sure it is an ABI break, but the default performance might be
> > >> worse. I am not sure if you are proposing a way to enforce the
> > >> dependency or just saying that there is a dependency. We can't do the
> > >> latter, but if there is a way for the kernel to check the dependency and
> > >> make the right choice, then that should work.
> > >
> > > So we can't accept that older dt's will run slower on a newer kernel
> > > and say that a newer dt is needed for full performance?
> > >
> > > If that's not an option, then I have no idea how to resolve this. I'm
> > > not greatly knowledgeable about the icc subsystem. I can try to look
> > > into options, but I'm not greatly optimistic about me finding one. If
> > > someone could suggest a concept on how to make it work, I could
> > > implement it. But I'm not even seeing the concept right now.
> > >
> > >>>>> 2) Jon, you report that even with both this change and the related dt
> > >>>>> change, that the issue is still not fixed. But then posted a log
> > >>>>> showing that the emc rate is set to max. If the issue is that emc rate
> > >>>>> is too low, then how can debugfs report that the rate is max? For
> > >>>>> reference, everything scales as expected for me given this change plus
> > >>>>> the dt change on both p2771 and p3636+p3509.
> > >>>>
> > >>>> To clarify, this broke the boot test on Tegra194 because the boot was
> > >>>> too slow. However, this also broke the EMC test on Tegra186 because
> > >>>> setting the frequency from the debugfs failed. So two different failures
> > >>>> on two different devices. I am guessing the EMC test would also fail on
> > >>>> Tegra194, but given that it does not boot, we did not get that far.
> > >>>
> > >>> So you're saying that even with the dt changes, this change on
> > >>> tegra194 still does not boot before the regression test framework
> > >>> times out? If so, I need some more details about this. I have not seen
> > >>> issues on p2972 or p3518. For example, if I boot to android recovery
> > >>> where I set the cpufreq governor to performance, I see emc clock rate
> > >>> set to 2133 MHz and 1600 MHz respectively. And boot time from kernel
> > >>> start to pixels on display is 15 seconds, give or take a couple
> > >>> seconds. This is using the boot stack from l4t r32.7.6.
> > >>
> > >> Yes. The boot failure here is not a hard boot failure, but the device
> > >> takes too long to boot and the boot test times out. And no we will not
> > >> increase the timeout as it is there for a reason. It could well be
> > >> because the default governor is not set to performance. If you boot with
> > >> just using the stock 'defconfig' for ARM64 without setting the governor
> > >> does it take longer?
> > >
> > > So, I checked out next-20251210, then b4 shazam'ed this series and the
> > > matching dt series,
> > > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. Then built with
> > > LLVM=1 ARCH=arm64 make defconfig
> > > LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb
> > >
> > > I packaged them into an android boot image using a lightly modified
> > > copy of Gnurou's bbinitramfs which just drops to a busybox shell. Note
> > > that this includes no modules, and since the pcie driver is =m in
> > > defconfig, it is not included. Then I flashed that with the l4t
> > > r32.7.6 boot stack to p2972. I got the shell on uart after 4.275
> > > seconds in the kernel. Per sysfs, the cpufreq governor is schedutil
> > > and all policies are idling at min freq, 115200. And per debugfs, the
> > > emc clock is 800000000. All this looks to be as expected.
> > >
> > > I have no idea why the regression test setup is timing out. I have not
> > > seen the issue through any of my testing. On pure mainline as per the
> > > above paragraph, or with the patches on the android common kernel, as
> > > per my target use case. I don't know what to do if I can't replicate
> > > the issue. I don't suppose the flash package for the regression test
> > > setup is something that could be released?
> >
> > I thought we already concluded that you did not see this because you did
> > not have the PCIe module present in your testing? From the above its
> > sounds like you still don't have that driver present and so you don't
> > see the issue. I guess I am not surprised by that but I am not sure why
> > you are now saying you have no idea why this is timing out? I thought
> > this was understood.
>
> Oh, come on... The issue is a combination of old dt AND the pcie
> driver. I can reproduce low emc clock with that. But then you said
> t194 on the regression bench was still timing out even with the new
> dt. And that's what I cannot reproduce. And then you asked me to test
> with pure mainline and a stock/unmodified defconfig. So I did, using
> -next and the two open series, but clarified what an unmodified
> defconfig meant.
>
> So, I modified the .config to enable the pcie driver as built-in, then
> reflashed. Otherwise the same as my previous post. I got the shell
> after 11 seconds. And clocks are still as reported before, cpu at min,
> emc at 800000000.

To try to move a resolution along, let me try to enumerate the issues
again. Again, please clarify should I have something incorrect or
incomplete.

1) The primary issue is when an old dtb is used with this commit and
the pcie driver is loaded. I can reproduce this issue on t186 and
t194. If this becomes the sole remaining blocking issue, I would like
for an exception to the normal rule be considered and this merged
anyways. Since it does not cause a boot failure and distros package a
new dt normally anyways. And to my knowledge, working around this
would involve redoing part off the icc subsystem itself, a major task
in comparison.

2) T194 is reported to have low clocks even with a new dt on the
Nvidia regression bench. I cannot reproduce this, even with the pcie
driver loaded. Can this be re-verified, please? And if it still
happens, can logs from the failure be made available and/or more
information provided as to the state of the unit? Like changes to the
default defconfig, modules that get loaded, etc.

3) Setting the max clock via debugfs fails when icc has pushed the
current clock higher than the requested rate. This is a logic issue
with all tegra emc drivers that implement dfs via icc. The suggested
resolutions are to leave this as is to keep consistency with the
existing drivers, perhaps updating all later, or to update the
existing implementations in a separate series, then send a new
revision here to match. I am personally unable to verify anything
older than tegra124, however.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 1 month, 3 weeks ago

On 17/12/2025 18:39, Aaron Kling wrote:

...

> To try to move a resolution along, let me try to enumerate the issues
> again. Again, please clarify should I have something incorrect or
> incomplete.
> 
> 1) The primary issue is when an old dtb is used with this commit and
> the pcie driver is loaded. I can reproduce this issue on t186 and
> t194. If this becomes the sole remaining blocking issue, I would like
> for an exception to the normal rule be considered and this merged
> anyways. Since it does not cause a boot failure and distros package a
> new dt normally anyways. And to my knowledge, working around this
> would involve redoing part off the icc subsystem itself, a major task
> in comparison.
> 
> 2) T194 is reported to have low clocks even with a new dt on the
> Nvidia regression bench. I cannot reproduce this, even with the pcie
> driver loaded. Can this be re-verified, please? And if it still
> happens, can logs from the failure be made available and/or more
> information provided as to the state of the unit? Like changes to the
> default defconfig, modules that get loaded, etc.

Can you list all the patches that need to be applied on top of the 
current -next and I will run it through our testing to make sure I have 
this correct.

> 3) Setting the max clock via debugfs fails when icc has pushed the
> current clock higher than the requested rate. This is a logic issue
> with all tegra emc drivers that implement dfs via icc. The suggested
> resolutions are to leave this as is to keep consistency with the
> existing drivers, perhaps updating all later, or to update the
> existing implementations in a separate series, then send a new
> revision here to match. I am personally unable to verify anything
> older than tegra124, however.

Thierry and I chatted about this last week and we feel that debugfs 
should be able to override the current configuration. So this will need 
to be addressed as well.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 3 weeks ago

On Wed, Dec 17, 2025 at 12:59 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 17/12/2025 18:39, Aaron Kling wrote:
>
> ...
>
> > To try to move a resolution along, let me try to enumerate the issues
> > again. Again, please clarify should I have something incorrect or
> > incomplete.
> >
> > 1) The primary issue is when an old dtb is used with this commit and
> > the pcie driver is loaded. I can reproduce this issue on t186 and
> > t194. If this becomes the sole remaining blocking issue, I would like
> > for an exception to the normal rule be considered and this merged
> > anyways. Since it does not cause a boot failure and distros package a
> > new dt normally anyways. And to my knowledge, working around this
> > would involve redoing part off the icc subsystem itself, a major task
> > in comparison.
> >
> > 2) T194 is reported to have low clocks even with a new dt on the
> > Nvidia regression bench. I cannot reproduce this, even with the pcie
> > driver loaded. Can this be re-verified, please? And if it still
> > happens, can logs from the failure be made available and/or more
> > information provided as to the state of the unit? Like changes to the
> > default defconfig, modules that get loaded, etc.
>
> Can you list all the patches that need to be applied on top of the
> current -next and I will run it through our testing to make sure I have
> this correct.

This series, message id:
20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com. And the dt
series, message id:
20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. So, my build
sequence is:

git checkout next-20251217
b4 shazam 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com
b4 shazam 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com
LLVM=1 ARCH=arm64 make defconfig
*edit .config to set CONFIG_PCIE_TEGRA194, CONFIG_PCIE_TEGRA194_HOST,
and CONFIG_PCIE_TEGRA194_EP to =y*
LLVM=1 ARCH=arm64 make olddefconfig
LLVM=1 ARCH=arm64 make -j33 Image nvidia/tegra194-p2972-0000.dtb

I then flash those with no modules, packaged with the simple ramdisk,
and I get a shell at 11.2 seconds and emc rate is 800 MHz at idle.

> > 3) Setting the max clock via debugfs fails when icc has pushed the
> > current clock higher than the requested rate. This is a logic issue
> > with all tegra emc drivers that implement dfs via icc. The suggested
> > resolutions are to leave this as is to keep consistency with the
> > existing drivers, perhaps updating all later, or to update the
> > existing implementations in a separate series, then send a new
> > revision here to match. I am personally unable to verify anything
> > older than tegra124, however.
>
> Thierry and I chatted about this last week and we feel that debugfs
> should be able to override the current configuration. So this will need
> to be addressed as well.

Alright. I will start looking at getting that logic straight, then
upload a new series for the older archs and a new revision of this.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 1 month, 3 weeks ago

On 17/12/2025 20:29, Aaron Kling wrote:
> On Wed, Dec 17, 2025 at 12:59 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>>
>>
>> On 17/12/2025 18:39, Aaron Kling wrote:
>>
>> ...
>>
>>> To try to move a resolution along, let me try to enumerate the issues
>>> again. Again, please clarify should I have something incorrect or
>>> incomplete.
>>>
>>> 1) The primary issue is when an old dtb is used with this commit and
>>> the pcie driver is loaded. I can reproduce this issue on t186 and
>>> t194. If this becomes the sole remaining blocking issue, I would like
>>> for an exception to the normal rule be considered and this merged
>>> anyways. Since it does not cause a boot failure and distros package a
>>> new dt normally anyways. And to my knowledge, working around this
>>> would involve redoing part off the icc subsystem itself, a major task
>>> in comparison.
>>>
>>> 2) T194 is reported to have low clocks even with a new dt on the
>>> Nvidia regression bench. I cannot reproduce this, even with the pcie
>>> driver loaded. Can this be re-verified, please? And if it still
>>> happens, can logs from the failure be made available and/or more
>>> information provided as to the state of the unit? Like changes to the
>>> default defconfig, modules that get loaded, etc.
>>
>> Can you list all the patches that need to be applied on top of the
>> current -next and I will run it through our testing to make sure I have
>> this correct.
> 
> This series, message id:
> 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com. And the dt
> series, message id:
> 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. So, my build
> sequence is:
> 
> git checkout next-20251217
> b4 shazam 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com
> b4 shazam 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com

Thanks I added all these on top of next-20251216 (as that is the latest 
I have tested) and Tegra194 fails to boot. We always include all the 
modules in the rootfs that is being tested. You can see the boot log 
here [0]. We are using an NFS rootfs for testing and I see a message 
related to the NFS server not responding. I am guessing something is 
running too slow again because the only thing I changed was adding your 
patches. The test harness reports it is timing out ...

FAILED: Linux Boot Test 1
	Test Owner(s): N/A
	Execution Time 219.31 sec
	Test TIMEOUT reached. Test did not report results in 120 secs
	Percent passed so far: 0.0

>>> 3) Setting the max clock via debugfs fails when icc has pushed the
>>> current clock higher than the requested rate. This is a logic issue
>>> with all tegra emc drivers that implement dfs via icc. The suggested
>>> resolutions are to leave this as is to keep consistency with the
>>> existing drivers, perhaps updating all later, or to update the
>>> existing implementations in a separate series, then send a new
>>> revision here to match. I am personally unable to verify anything
>>> older than tegra124, however.
>>
>> Thierry and I chatted about this last week and we feel that debugfs
>> should be able to override the current configuration. So this will need
>> to be addressed as well.
> 
> Alright. I will start looking at getting that logic straight, then
> upload a new series for the older archs and a new revision of this.

And just to confirm the test that sets the EMC frequency via the debugfs 
also still fails.

Jon

[0] https://pastebin.com/5ghbSsu7

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 3 weeks ago

On Wed, Dec 17, 2025 at 3:53 PM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 17/12/2025 20:29, Aaron Kling wrote:
> > On Wed, Dec 17, 2025 at 12:59 PM Jon Hunter <jonathanh@nvidia.com> wrote:
> >>
> >>
> >> On 17/12/2025 18:39, Aaron Kling wrote:
> >>
> >> ...
> >>
> >>> To try to move a resolution along, let me try to enumerate the issues
> >>> again. Again, please clarify should I have something incorrect or
> >>> incomplete.
> >>>
> >>> 1) The primary issue is when an old dtb is used with this commit and
> >>> the pcie driver is loaded. I can reproduce this issue on t186 and
> >>> t194. If this becomes the sole remaining blocking issue, I would like
> >>> for an exception to the normal rule be considered and this merged
> >>> anyways. Since it does not cause a boot failure and distros package a
> >>> new dt normally anyways. And to my knowledge, working around this
> >>> would involve redoing part off the icc subsystem itself, a major task
> >>> in comparison.
> >>>
> >>> 2) T194 is reported to have low clocks even with a new dt on the
> >>> Nvidia regression bench. I cannot reproduce this, even with the pcie
> >>> driver loaded. Can this be re-verified, please? And if it still
> >>> happens, can logs from the failure be made available and/or more
> >>> information provided as to the state of the unit? Like changes to the
> >>> default defconfig, modules that get loaded, etc.
> >>
> >> Can you list all the patches that need to be applied on top of the
> >> current -next and I will run it through our testing to make sure I have
> >> this correct.
> >
> > This series, message id:
> > 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com. And the dt
> > series, message id:
> > 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com. So, my build
> > sequence is:
> >
> > git checkout next-20251217
> > b4 shazam 20251027-tegra186-icc-p2-v4-0-e4e4f57e2103@gmail.com
> > b4 shazam 20251021-tegra186-icc-p3-v3-0-68184ee8a89c@gmail.com
>
> Thanks I added all these on top of next-20251216 (as that is the latest
> I have tested) and Tegra194 fails to boot. We always include all the
> modules in the rootfs that is being tested. You can see the boot log
> here [0]. We are using an NFS rootfs for testing and I see a message
> related to the NFS server not responding. I am guessing something is
> running too slow again because the only thing I changed was adding your
> patches. The test harness reports it is timing out ...
>
> FAILED: Linux Boot Test 1
>         Test Owner(s): N/A
>         Execution Time 219.31 sec
>         Test TIMEOUT reached. Test did not report results in 120 secs
>         Percent passed so far: 0.0

Okay, so. Modules are in the rootfs, none get copied to the initramfs?
And the rootfs is on nfs? And for this failure, nfs never gets
mounted. So... for this case, no modules get loaded, implying that
whatever is happening is happening with the built-in drivers. Which
means this case isn't pcie related. Are there any modifications to the
defconfig? It appears that there must be, to have dwc-eth-dwmac
available. I will see if I can trigger anything when using ethernet.

If this does eventually boot to a rootfs, as implied by the comments
about debugs below, can you check to see what emc clock speed is after
boot?

> >>> 3) Setting the max clock via debugfs fails when icc has pushed the
> >>> current clock higher than the requested rate. This is a logic issue
> >>> with all tegra emc drivers that implement dfs via icc. The suggested
> >>> resolutions are to leave this as is to keep consistency with the
> >>> existing drivers, perhaps updating all later, or to update the
> >>> existing implementations in a separate series, then send a new
> >>> revision here to match. I am personally unable to verify anything
> >>> older than tegra124, however.
> >>
> >> Thierry and I chatted about this last week and we feel that debugfs
> >> should be able to override the current configuration. So this will need
> >> to be addressed as well.
> >
> > Alright. I will start looking at getting that logic straight, then
> > upload a new series for the older archs and a new revision of this.
>
> And just to confirm the test that sets the EMC frequency via the debugfs
> also still fails.
>
> Jon
>
> [0] https://pastebin.com/5ghbSsu7
>
> --
> nvpublic
>

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 1 month, 3 weeks ago

On 17/12/2025 22:44, Aaron Kling wrote:

...

>> Thanks I added all these on top of next-20251216 (as that is the latest
>> I have tested) and Tegra194 fails to boot. We always include all the
>> modules in the rootfs that is being tested. You can see the boot log
>> here [0]. We are using an NFS rootfs for testing and I see a message
>> related to the NFS server not responding. I am guessing something is
>> running too slow again because the only thing I changed was adding your
>> patches. The test harness reports it is timing out ...
>>
>> FAILED: Linux Boot Test 1
>>          Test Owner(s): N/A
>>          Execution Time 219.31 sec
>>          Test TIMEOUT reached. Test did not report results in 120 secs
>>          Percent passed so far: 0.0
> 
> Okay, so. Modules are in the rootfs, none get copied to the initramfs?
> And the rootfs is on nfs? And for this failure, nfs never gets
> mounted. So... for this case, no modules get loaded, implying that
> whatever is happening is happening with the built-in drivers. Which
> means this case isn't pcie related. Are there any modifications to the
> defconfig? It appears that there must be, to have dwc-eth-dwmac
> available. I will see if I can trigger anything when using ethernet.

If you look at the boot log you will see ...

[    7.839012] Root device found: nfs
[    7.908307] Ethernet interface: eth0
[    7.929765] IP Address: 192.168.99.2
[    8.173978] Rootfs mounted over nfs
[    8.306291] Switching from initrd to actual rootfs

So it does mount the rootfs and so the modules would be loaded. I 
believe that PCIe is definitely loaded because that is what I observed 
before. And yes there are a few modifications to the defconfig that we 
make on top (that have been added over the years for various reasons) ...

CONFIG_ARM64_PMEM=y
CONFIG_BROADCOM_PHY=y
CONFIG_DWMAC_DWC_QOS_ETH=y
CONFIG_EEPROM_AT24=m
CONFIG_EXTRA_FIRMWARE="nvidia/tegra210/xusb.bin nvidia/tegra186/xusb.bin 
nvidia/tegra194/xusb.bin rtl_nic/rtl8153a-3.fw rtl_nic/rtl8168h-2.fw"
CONFIG_EXTRA_FIRMWARE_DIR="${KERNEL_FW_DIR}"
CONFIG_MARVELL_PHY=y
CONFIG_R8169=y
CONFIG_RANDOMIZE_BASE=n
CONFIG_SERIAL_TEGRA_TCU=y
CONFIG_SERIAL_TEGRA_TCU_CONSOLE=y
CONFIG_STAGING=y
CONFIG_STAGING_MEDIA=y
CONFIG_STMMAC_ETH=y
CONFIG_STMMAC_PLATFORM=y
CONFIG_USB_RTL8152=y
CONFIG_VIDEO_TEGRA=m
CONFIG_VIDEO_TEGRA_TPG=y
CONFIG_DWMAC_TEGRA=y

Looking at the boot log I see ...

[    3.854658] cpu cpu0: cpufreq_init: failed to get clk: -2
[    3.854927] cpu cpu0: cpufreq_init: failed to get clk: -2
[    3.855218] cpu cpu2: cpufreq_init: failed to get clk: -2
[    3.858438] cpu cpu2: cpufreq_init: failed to get clk: -2
[    3.863987] cpu cpu4: cpufreq_init: failed to get clk: -2
[    3.869741] cpu cpu4: cpufreq_init: failed to get clk: -2
[    3.875006] cpu cpu6: cpufreq_init: failed to get clk: -2
[    3.880725] cpu cpu6: cpufreq_init: failed to get clk: -2
[    3.886018] cpufreq-dt cpufreq-dt: failed register driver: -19

So actually, I am now wondering if this is the problem?

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 3 weeks ago

On Thu, Dec 18, 2025 at 5:12 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>
>
> On 17/12/2025 22:44, Aaron Kling wrote:
>
> ...
>
> >> Thanks I added all these on top of next-20251216 (as that is the latest
> >> I have tested) and Tegra194 fails to boot. We always include all the
> >> modules in the rootfs that is being tested. You can see the boot log
> >> here [0]. We are using an NFS rootfs for testing and I see a message
> >> related to the NFS server not responding. I am guessing something is
> >> running too slow again because the only thing I changed was adding your
> >> patches. The test harness reports it is timing out ...
> >>
> >> FAILED: Linux Boot Test 1
> >>          Test Owner(s): N/A
> >>          Execution Time 219.31 sec
> >>          Test TIMEOUT reached. Test did not report results in 120 secs
> >>          Percent passed so far: 0.0
> >
> > Okay, so. Modules are in the rootfs, none get copied to the initramfs?
> > And the rootfs is on nfs? And for this failure, nfs never gets
> > mounted. So... for this case, no modules get loaded, implying that
> > whatever is happening is happening with the built-in drivers. Which
> > means this case isn't pcie related. Are there any modifications to the
> > defconfig? It appears that there must be, to have dwc-eth-dwmac
> > available. I will see if I can trigger anything when using ethernet.
>
> If you look at the boot log you will see ...
>
> [    7.839012] Root device found: nfs
> [    7.908307] Ethernet interface: eth0
> [    7.929765] IP Address: 192.168.99.2
> [    8.173978] Rootfs mounted over nfs
> [    8.306291] Switching from initrd to actual rootfs
>
> So it does mount the rootfs and so the modules would be loaded. I

But the bottom of the log says:
[ 188.360095] nfs: server 192.168.99.1 not responding, still trying

So does it mount nfs and load modules, and *then* fail to talk to the
nfs server? That doesn't make any sense. And I don't see any logs from
driver probes after the rootfs line. And there's sync_state lines
stating that pcie among others isn't available.

> believe that PCIe is definitely loaded because that is what I observed
> before. And yes there are a few modifications to the defconfig that we
> make on top (that have been added over the years for various reasons) ...
>
> CONFIG_ARM64_PMEM=y
> CONFIG_BROADCOM_PHY=y
> CONFIG_DWMAC_DWC_QOS_ETH=y
> CONFIG_EEPROM_AT24=m
> CONFIG_EXTRA_FIRMWARE="nvidia/tegra210/xusb.bin nvidia/tegra186/xusb.bin
> nvidia/tegra194/xusb.bin rtl_nic/rtl8153a-3.fw rtl_nic/rtl8168h-2.fw"
> CONFIG_EXTRA_FIRMWARE_DIR="${KERNEL_FW_DIR}"
> CONFIG_MARVELL_PHY=y
> CONFIG_R8169=y
> CONFIG_RANDOMIZE_BASE=n
> CONFIG_SERIAL_TEGRA_TCU=y
> CONFIG_SERIAL_TEGRA_TCU_CONSOLE=y
> CONFIG_STAGING=y
> CONFIG_STAGING_MEDIA=y
> CONFIG_STMMAC_ETH=y
> CONFIG_STMMAC_PLATFORM=y
> CONFIG_USB_RTL8152=y
> CONFIG_VIDEO_TEGRA=m
> CONFIG_VIDEO_TEGRA_TPG=y
> CONFIG_DWMAC_TEGRA=y

I will incorporate these to a build and see if I get any different results.

> Looking at the boot log I see ...
>
> [    3.854658] cpu cpu0: cpufreq_init: failed to get clk: -2
> [    3.854927] cpu cpu0: cpufreq_init: failed to get clk: -2
> [    3.855218] cpu cpu2: cpufreq_init: failed to get clk: -2
> [    3.858438] cpu cpu2: cpufreq_init: failed to get clk: -2
> [    3.863987] cpu cpu4: cpufreq_init: failed to get clk: -2
> [    3.869741] cpu cpu4: cpufreq_init: failed to get clk: -2
> [    3.875006] cpu cpu6: cpufreq_init: failed to get clk: -2
> [    3.880725] cpu cpu6: cpufreq_init: failed to get clk: -2
> [    3.886018] cpufreq-dt cpufreq-dt: failed register driver: -19
>
> So actually, I am now wondering if this is the problem?

These lines are from cpufreq-dt trying to manage the cpu's directly,
which it's not supposed to do. tegra194-cpufreq is supposed to manage
them. I see these lines as well, when things are operating as
expected. The real driver doesn't log anything, but the policies are
visible in sysfs. I did a little bit of digging previously to see if I
could remove the log churn, but was unable to do so. I would have to
double check to be completely sure, but I am fairly certain I saw
these lines before my changes as well. It's something that would be
good to get fixed, but I don't think it's operable here.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Aaron Kling 1 month, 3 weeks ago

On Thu, Dec 18, 2025 at 1:25 PM Aaron Kling <webgeek1234@gmail.com> wrote:
>
> On Thu, Dec 18, 2025 at 5:12 AM Jon Hunter <jonathanh@nvidia.com> wrote:
> >
> >
> > On 17/12/2025 22:44, Aaron Kling wrote:
> >
> > ...
> >
> > >> Thanks I added all these on top of next-20251216 (as that is the latest
> > >> I have tested) and Tegra194 fails to boot. We always include all the
> > >> modules in the rootfs that is being tested. You can see the boot log
> > >> here [0]. We are using an NFS rootfs for testing and I see a message
> > >> related to the NFS server not responding. I am guessing something is
> > >> running too slow again because the only thing I changed was adding your
> > >> patches. The test harness reports it is timing out ...
> > >>
> > >> FAILED: Linux Boot Test 1
> > >>          Test Owner(s): N/A
> > >>          Execution Time 219.31 sec
> > >>          Test TIMEOUT reached. Test did not report results in 120 secs
> > >>          Percent passed so far: 0.0
> > >
> > > Okay, so. Modules are in the rootfs, none get copied to the initramfs?
> > > And the rootfs is on nfs? And for this failure, nfs never gets
> > > mounted. So... for this case, no modules get loaded, implying that
> > > whatever is happening is happening with the built-in drivers. Which
> > > means this case isn't pcie related. Are there any modifications to the
> > > defconfig? It appears that there must be, to have dwc-eth-dwmac
> > > available. I will see if I can trigger anything when using ethernet.
> >
> > If you look at the boot log you will see ...
> >
> > [    7.839012] Root device found: nfs
> > [    7.908307] Ethernet interface: eth0
> > [    7.929765] IP Address: 192.168.99.2
> > [    8.173978] Rootfs mounted over nfs
> > [    8.306291] Switching from initrd to actual rootfs
> >
> > So it does mount the rootfs and so the modules would be loaded. I
>
> But the bottom of the log says:
> [ 188.360095] nfs: server 192.168.99.1 not responding, still trying
>
> So does it mount nfs and load modules, and *then* fail to talk to the
> nfs server? That doesn't make any sense. And I don't see any logs from
> driver probes after the rootfs line. And there's sync_state lines
> stating that pcie among others isn't available.
>
> > believe that PCIe is definitely loaded because that is what I observed
> > before. And yes there are a few modifications to the defconfig that we
> > make on top (that have been added over the years for various reasons) ...
> >
> > CONFIG_ARM64_PMEM=y
> > CONFIG_BROADCOM_PHY=y
> > CONFIG_DWMAC_DWC_QOS_ETH=y
> > CONFIG_EEPROM_AT24=m
> > CONFIG_EXTRA_FIRMWARE="nvidia/tegra210/xusb.bin nvidia/tegra186/xusb.bin
> > nvidia/tegra194/xusb.bin rtl_nic/rtl8153a-3.fw rtl_nic/rtl8168h-2.fw"
> > CONFIG_EXTRA_FIRMWARE_DIR="${KERNEL_FW_DIR}"
> > CONFIG_MARVELL_PHY=y
> > CONFIG_R8169=y
> > CONFIG_RANDOMIZE_BASE=n
> > CONFIG_SERIAL_TEGRA_TCU=y
> > CONFIG_SERIAL_TEGRA_TCU_CONSOLE=y
> > CONFIG_STAGING=y
> > CONFIG_STAGING_MEDIA=y
> > CONFIG_STMMAC_ETH=y
> > CONFIG_STMMAC_PLATFORM=y
> > CONFIG_USB_RTL8152=y
> > CONFIG_VIDEO_TEGRA=m
> > CONFIG_VIDEO_TEGRA_TPG=y
> > CONFIG_DWMAC_TEGRA=y
>
> I will incorporate these to a build and see if I get any different results.
>
> > Looking at the boot log I see ...
> >
> > [    3.854658] cpu cpu0: cpufreq_init: failed to get clk: -2
> > [    3.854927] cpu cpu0: cpufreq_init: failed to get clk: -2
> > [    3.855218] cpu cpu2: cpufreq_init: failed to get clk: -2
> > [    3.858438] cpu cpu2: cpufreq_init: failed to get clk: -2
> > [    3.863987] cpu cpu4: cpufreq_init: failed to get clk: -2
> > [    3.869741] cpu cpu4: cpufreq_init: failed to get clk: -2
> > [    3.875006] cpu cpu6: cpufreq_init: failed to get clk: -2
> > [    3.880725] cpu cpu6: cpufreq_init: failed to get clk: -2
> > [    3.886018] cpufreq-dt cpufreq-dt: failed register driver: -19
> >
> > So actually, I am now wondering if this is the problem?
>
> These lines are from cpufreq-dt trying to manage the cpu's directly,
> which it's not supposed to do. tegra194-cpufreq is supposed to manage
> them. I see these lines as well, when things are operating as
> expected. The real driver doesn't log anything, but the policies are
> visible in sysfs. I did a little bit of digging previously to see if I
> could remove the log churn, but was unable to do so. I would have to
> double check to be completely sure, but I am fairly certain I saw
> these lines before my changes as well. It's something that would be
> good to get fixed, but I don't think it's operable here.

Turns out, this is actually semi-operable. There's a blocklist in the
cpufreq-dt driver that includes all tegra archs <= t234 except for
t186 and t194. If I add t194 to that list, then the log lines go away.
However, it does not fix the nfs boot issue. I was finally able to
replicate it by setting up my own nfs rootfs. This series does not
affect it though, fwiw, it's the dt series that triggers this. Before
it, nfsroot boots as expected. After it, the reported issue happens.
After adding t194 to the cpufreq-dt blocklist, the issue still
happens. But... if I add "blacklist=cpufreq-dt" to the kernel
bootargs, nfs works again. I don't get this.

So, summary:
* Adding opp tables to the cpu nodes causes cpufreq-dt to try to
handle cpufreq for the soc
* Adding tegra194 to the cpufreq-dt-platdev blocklist stops log
messages about the attempt
* However, it still affects the ethernet driver, causing watchdog
timeouts and adapter resets
* Blacklisting the cpufreq-dt driver entirely prevents the issue

I'm not sure what to make of this. Anyone have thoughts? I will send a
patch separately to add t186 and t194 to the cpufreq-dt-platdev block
list as this needs to happen in any case.

Aaron

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Jon Hunter 1 month, 3 weeks ago

On 18/12/2025 21:20, Aaron Kling wrote:

...

> Turns out, this is actually semi-operable. There's a blocklist in the
> cpufreq-dt driver that includes all tegra archs <= t234 except for
> t186 and t194. If I add t194 to that list, then the log lines go away.
> However, it does not fix the nfs boot issue. I was finally able to
> replicate it by setting up my own nfs rootfs. This series does not
> affect it though, fwiw, it's the dt series that triggers this. Before
> it, nfsroot boots as expected. After it, the reported issue happens.
> After adding t194 to the cpufreq-dt blocklist, the issue still
> happens. But... if I add "blacklist=cpufreq-dt" to the kernel
> bootargs, nfs works again. I don't get this.
> 
> So, summary:
> * Adding opp tables to the cpu nodes causes cpufreq-dt to try to
> handle cpufreq for the soc
> * Adding tegra194 to the cpufreq-dt-platdev blocklist stops log
> messages about the attempt
> * However, it still affects the ethernet driver, causing watchdog
> timeouts and adapter resets
> * Blacklisting the cpufreq-dt driver entirely prevents the issue
> 
> I'm not sure what to make of this. Anyone have thoughts? I will send a
> patch separately to add t186 and t194 to the cpufreq-dt-platdev block
> list as this needs to happen in any case.

Great glad you see the same and thanks for the summary.

Have you looked at what the CPU and EMC frequencies are doing? I still 
don't understand the connection to the ethernet driver.

Have you tried setting the performance governor for CPUFREQ to see if 
that works? That would tell us if the CPU speed is related.

Jon

-- 
nvpublic

Re: [PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling

Posted by Krzysztof Kozlowski 2 months, 4 weeks ago

On 12/11/2025 08:21, Aaron Kling wrote:
> On Wed, Nov 12, 2025 at 12:18 AM Jon Hunter <jonathanh@nvidia.com> wrote:
>>
>>
>> On 11/11/2025 23:17, Aaron Kling wrote:
>>
>> ...
>>
>>> Alright, I think I've got the picture of what's going on now. The
>>> standard arm64 defconfig enables the t194 pcie driver as a module. And
>>> my simple busybox ramdisk that I use for mainline regression testing
>>> isn't loading any modules. If I set the pcie driver to built-in, I
>>> replicate the issue. And I don't see the issue on my normal use case,
>>> because I have the dt changes as well.
>>>
>>> So it appears that the pcie driver submits icc bandwidth. And without
>>> cpufreq submitting bandwidth as well, the emc driver gets a very low
>>> number and thus sets a very low emc freq. The question becomes... what
>>> to do about it? If the related dt changes were submitted to
>>> linux-next, everything should fall into place. And I'm not sure where
>>> this falls on the severity scale since it doesn't full out break boot
>>> or prevent operation.
>>
>> Where are the related DT changes? If we can get these into -next and
>> lined up to be merged for v6.19, then that is fine. However, we should
>> not merge this for v6.19 without the DT changes.
> 
> The dt changes are here [0].
> 
> This was all part of the same series, keeping everything logically
> related together. But on v2, Krzysztof said that none of this should

I asked you about dependencies between the patches and you said there
are none, so collecting different subsystems into one is wrong. That's
nothing new, standard Linux kernel process.

What is non-standard here is keeping secret that there is impact on users.

> have ever been together and that each subsystem should get a separate
> series, even if the changes are related. Which I did, and now this is
> split across three series. The actmon series for tegra210 is in a
> similar state. Split across four series and only one has been pulled
> to linux-next.
> 
>> I will also talk with Thierry to see if he has any concerns about users
>> seeing slow performance if they don't have an up-to-date DTB.
>>
>> Is there any easy way to detect if the DTB has he necessary properties
>> to enable ICC scaling?
> 
> I'm not sure there is any simple way, given how I set up tegra186 and
> tegra194. The new dt properties are on the cpu nodes, there's nothing
> new for the emc node. So the emc driver just unconditionally declares
> itself to icc. It was doing this before too, but wouldn't do anything
> on tegra186 or tegra194 because the set_bw function was just a stub
> and the real logic happened in the bpmp bw mgr, which only exists on
> tegra234+. Now the set_bw function will directly calculate and set the
> emc clock as long as the bpmp bw mgr is not supported. Offhand, I
> can't think of anything existing to check to skip this, because
> nothing new in the dt has been added in the scope of emc.
If your ICC triggers without users, I think it is usual case - you
should not enable the sync_state but instead keep it disabled till you
have all the consumers in place.

Best regards,
Krzysztof

[PATCH v4 1/5] dt-bindings: memory: tegra186-mc: Add dummy client IDs for Tegra186
[PATCH v4 2/5] dt-bindings: memory: tegra194-mc: Add dummy client IDs for Tegra194
[PATCH v4 3/5] memory: tegra186-emc: Support non-bpmp icc scaling
[PATCH v4 4/5] memory: tegra186: Support icc scaling
[PATCH v4 5/5] memory: tegra194: Support icc scaling