The Adreno GPU Management Unit (GMU) can also scale the ddr
bandwidth along the frequency and power domain level, but for
now we statically fill the bw_table with values from the
downstream driver.
Only the first entry is used, which is a disable vote, so we
currently rely on scaling via the linux interconnect paths.
Let's dynamically generate the bw_table with the vote values
previously calculated from the OPPs.
Those entried will then be used by the GMU when passing the
appropriate bandwidth level when voting for a gpu frequency.
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
---
drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++--------
1 file changed, 37 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
@@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
msg->cnoc_cmds_data[1][0] = 0x60000001;
}
-static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
+static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
+ struct a6xx_hfi_msg_bw_table *msg)
{
- msg->bw_level_num = 1;
+ const struct a6xx_info *info = adreno_gpu->info->a6xx;
+ unsigned int i, j;
- msg->ddr_cmds_num = 3;
msg->ddr_wait_bitmask = 0x7;
- msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
- msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
- msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
+ for (i = 0; i < 3; i++) {
+ if (!info->bcm[i].name)
+ break;
+ msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name);
+ }
+ msg->ddr_cmds_num = i;
- msg->ddr_cmds_data[0][0] = 0x40000000;
- msg->ddr_cmds_data[0][1] = 0x40000000;
- msg->ddr_cmds_data[0][2] = 0x40000000;
+ for (i = 0; i < gmu->nr_gpu_bws; ++i)
+ for (j = 0; j < msg->ddr_cmds_num; j++)
+ msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j];
+ msg->bw_level_num = gmu->nr_gpu_bws;
+}
+
+static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
+ struct a6xx_hfi_msg_bw_table *msg)
+{
+ if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) {
+ a740_generate_bw_table(adreno_gpu, gmu, msg);
+ } else {
+ msg->bw_level_num = 1;
- /* TODO: add a proper dvfs table */
+ msg->ddr_cmds_num = 3;
+ msg->ddr_wait_bitmask = 0x7;
+
+ msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
+ msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
+ msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
+
+ msg->ddr_cmds_data[0][0] = 0x40000000;
+ msg->ddr_cmds_data[0][1] = 0x40000000;
+ msg->ddr_cmds_data[0][2] = 0x40000000;
+
+ /* TODO: add a proper dvfs table */
+ }
msg->cnoc_cmds_num = 1;
msg->cnoc_wait_bitmask = 0x1;
@@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
else if (adreno_is_a730(adreno_gpu))
a730_build_bw_table(msg);
else if (adreno_is_a740_family(adreno_gpu))
- a740_build_bw_table(msg);
+ a740_build_bw_table(adreno_gpu, gmu, msg);
else
a6xx_build_bw_table(msg);
--
2.34.1
On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote: > The Adreno GPU Management Unit (GMU) can also scale the ddr > bandwidth along the frequency and power domain level, but for > now we statically fill the bw_table with values from the > downstream driver. > > Only the first entry is used, which is a disable vote, so we > currently rely on scaling via the linux interconnect paths. > > Let's dynamically generate the bw_table with the vote values > previously calculated from the OPPs. Nice to see this being worked upon. I hope the code can is generic enough so that we can use it from other adreno_foo_build_bw_table() functions. > > Those entried will then be used by the GMU when passing the > appropriate bandwidth level when voting for a gpu frequency. > > Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org> > --- > drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++-------- > 1 file changed, 37 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > msg->cnoc_cmds_data[1][0] = 0x60000001; > } > > -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > + struct a6xx_hfi_msg_bw_table *msg) > { > - msg->bw_level_num = 1; > + const struct a6xx_info *info = adreno_gpu->info->a6xx; > + unsigned int i, j; > > - msg->ddr_cmds_num = 3; > msg->ddr_wait_bitmask = 0x7; > > - msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > - msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > - msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > + for (i = 0; i < 3; i++) { > + if (!info->bcm[i].name) > + break; > + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name); > + } > + msg->ddr_cmds_num = i; > > - msg->ddr_cmds_data[0][0] = 0x40000000; > - msg->ddr_cmds_data[0][1] = 0x40000000; > - msg->ddr_cmds_data[0][2] = 0x40000000; > + for (i = 0; i < gmu->nr_gpu_bws; ++i) > + for (j = 0; j < msg->ddr_cmds_num; j++) > + msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j]; > + msg->bw_level_num = gmu->nr_gpu_bws; > +} > + > +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > + struct a6xx_hfi_msg_bw_table *msg) > +{ > + if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) { > + a740_generate_bw_table(adreno_gpu, gmu, msg); > + } else { Why do we need a fallback code here? > + msg->bw_level_num = 1; > > - /* TODO: add a proper dvfs table */ > + msg->ddr_cmds_num = 3; > + msg->ddr_wait_bitmask = 0x7; > + > + msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > + msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > + msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > + > + msg->ddr_cmds_data[0][0] = 0x40000000; > + msg->ddr_cmds_data[0][1] = 0x40000000; > + msg->ddr_cmds_data[0][2] = 0x40000000; > + > + /* TODO: add a proper dvfs table */ I think TODO is unapplicable anymore. > + } > > msg->cnoc_cmds_num = 1; > msg->cnoc_wait_bitmask = 0x1; > @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) > else if (adreno_is_a730(adreno_gpu)) > a730_build_bw_table(msg); > else if (adreno_is_a740_family(adreno_gpu)) > - a740_build_bw_table(msg); > + a740_build_bw_table(adreno_gpu, gmu, msg); > else > a6xx_build_bw_table(msg); > > > -- > 2.34.1 > -- With best wishes Dmitry
On 15/11/2024 08:24, Dmitry Baryshkov wrote: > On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote: >> The Adreno GPU Management Unit (GMU) can also scale the ddr >> bandwidth along the frequency and power domain level, but for >> now we statically fill the bw_table with values from the >> downstream driver. >> >> Only the first entry is used, which is a disable vote, so we >> currently rely on scaling via the linux interconnect paths. >> >> Let's dynamically generate the bw_table with the vote values >> previously calculated from the OPPs. > > Nice to see this being worked upon. I hope the code can is generic > enough so that we can use it from other adreno_foo_build_bw_table() > functions. I would hope so, but I don't have the HW to properly test it on those platforms. > >> >> Those entried will then be used by the GMU when passing the >> appropriate bandwidth level when voting for a gpu frequency. >> >> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org> >> --- >> drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++-------- >> 1 file changed, 37 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c >> index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644 >> --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c >> @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) >> msg->cnoc_cmds_data[1][0] = 0x60000001; >> } >> >> -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) >> +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, >> + struct a6xx_hfi_msg_bw_table *msg) >> { >> - msg->bw_level_num = 1; >> + const struct a6xx_info *info = adreno_gpu->info->a6xx; >> + unsigned int i, j; >> >> - msg->ddr_cmds_num = 3; >> msg->ddr_wait_bitmask = 0x7; >> >> - msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); >> - msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); >> - msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); >> + for (i = 0; i < 3; i++) { >> + if (!info->bcm[i].name) >> + break; >> + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name); >> + } >> + msg->ddr_cmds_num = i; >> >> - msg->ddr_cmds_data[0][0] = 0x40000000; >> - msg->ddr_cmds_data[0][1] = 0x40000000; >> - msg->ddr_cmds_data[0][2] = 0x40000000; >> + for (i = 0; i < gmu->nr_gpu_bws; ++i) >> + for (j = 0; j < msg->ddr_cmds_num; j++) >> + msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j]; >> + msg->bw_level_num = gmu->nr_gpu_bws; >> +} >> + >> +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, >> + struct a6xx_hfi_msg_bw_table *msg) >> +{ >> + if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) { >> + a740_generate_bw_table(adreno_gpu, gmu, msg); >> + } else { > > Why do we need a fallback code here? Because at this particular commit, it would generate an invalid table, I should probably remove the fallback at the end > >> + msg->bw_level_num = 1; >> >> - /* TODO: add a proper dvfs table */ >> + msg->ddr_cmds_num = 3; >> + msg->ddr_wait_bitmask = 0x7; >> + >> + msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); >> + msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); >> + msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); >> + >> + msg->ddr_cmds_data[0][0] = 0x40000000; >> + msg->ddr_cmds_data[0][1] = 0x40000000; >> + msg->ddr_cmds_data[0][2] = 0x40000000; >> + >> + /* TODO: add a proper dvfs table */ > > I think TODO is unapplicable anymore. > >> + } >> >> msg->cnoc_cmds_num = 1; >> msg->cnoc_wait_bitmask = 0x1; >> @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) >> else if (adreno_is_a730(adreno_gpu)) >> a730_build_bw_table(msg); >> else if (adreno_is_a740_family(adreno_gpu)) >> - a740_build_bw_table(msg); >> + a740_build_bw_table(adreno_gpu, gmu, msg); >> else >> a6xx_build_bw_table(msg); >> >> >> -- >> 2.34.1 >> >
On Fri, Nov 15, 2024 at 10:11:09AM +0100, Neil Armstrong wrote: > On 15/11/2024 08:24, Dmitry Baryshkov wrote: > > On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote: > > > The Adreno GPU Management Unit (GMU) can also scale the ddr > > > bandwidth along the frequency and power domain level, but for > > > now we statically fill the bw_table with values from the > > > downstream driver. > > > > > > Only the first entry is used, which is a disable vote, so we > > > currently rely on scaling via the linux interconnect paths. > > > > > > Let's dynamically generate the bw_table with the vote values > > > previously calculated from the OPPs. > > > > Nice to see this being worked upon. I hope the code can is generic > > enough so that we can use it from other adreno_foo_build_bw_table() > > functions. > > I would hope so, but I don't have the HW to properly test it on those > platforms. Welcome to the club^W Lab. > > > Those entried will then be used by the GMU when passing the > > > appropriate bandwidth level when voting for a gpu frequency. > > > > > > Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org> > > > --- > > > drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++-------- > > > 1 file changed, 37 insertions(+), 11 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > > > index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644 > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > > > @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > > > msg->cnoc_cmds_data[1][0] = 0x60000001; > > > } > > > -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > > > +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > > > + struct a6xx_hfi_msg_bw_table *msg) > > > { > > > - msg->bw_level_num = 1; > > > + const struct a6xx_info *info = adreno_gpu->info->a6xx; > > > + unsigned int i, j; > > > - msg->ddr_cmds_num = 3; > > > msg->ddr_wait_bitmask = 0x7; > > > - msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > > > - msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > > > - msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > > > + for (i = 0; i < 3; i++) { > > > + if (!info->bcm[i].name) > > > + break; > > > + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name); > > > + } > > > + msg->ddr_cmds_num = i; > > > - msg->ddr_cmds_data[0][0] = 0x40000000; > > > - msg->ddr_cmds_data[0][1] = 0x40000000; > > > - msg->ddr_cmds_data[0][2] = 0x40000000; > > > + for (i = 0; i < gmu->nr_gpu_bws; ++i) > > > + for (j = 0; j < msg->ddr_cmds_num; j++) > > > + msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j]; > > > + msg->bw_level_num = gmu->nr_gpu_bws; > > > +} > > > + > > > +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > > > + struct a6xx_hfi_msg_bw_table *msg) > > > +{ > > > + if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) { > > > + a740_generate_bw_table(adreno_gpu, gmu, msg); > > > + } else { > > > > Why do we need a fallback code here? > > Because at this particular commit, it would generate an invalid table, I should probably remove the fallback at the end Or move this to a generic code that generates a table if there is no bw data (like there is none for older platforms with the current DTs). > > > > > > + msg->bw_level_num = 1; > > > - /* TODO: add a proper dvfs table */ > > > + msg->ddr_cmds_num = 3; > > > + msg->ddr_wait_bitmask = 0x7; > > > + > > > + msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > > > + msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > > > + msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > > > + > > > + msg->ddr_cmds_data[0][0] = 0x40000000; > > > + msg->ddr_cmds_data[0][1] = 0x40000000; > > > + msg->ddr_cmds_data[0][2] = 0x40000000; > > > + > > > + /* TODO: add a proper dvfs table */ > > > > I think TODO is unapplicable anymore. > > > > > + } > > > msg->cnoc_cmds_num = 1; > > > msg->cnoc_wait_bitmask = 0x1; > > > @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) > > > else if (adreno_is_a730(adreno_gpu)) > > > a730_build_bw_table(msg); > > > else if (adreno_is_a740_family(adreno_gpu)) > > > - a740_build_bw_table(msg); > > > + a740_build_bw_table(adreno_gpu, gmu, msg); > > > else > > > a6xx_build_bw_table(msg); > > > > > > -- > > > 2.34.1 > > > > > > -- With best wishes Dmitry
© 2016 - 2024 Red Hat, Inc.