.../gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-)
Currently, in rcar_mipi_dsi_parameters_calc(), the VCLK divider is stored
in setup_info structure as BIT(divider). The rcar_mipi_dsi_parameters_calc()
is called at the early beginning of rcar_mipi_dsi_startup() function. Later,
in the same rcar_mipi_dsi_startup() function, the stored BIT(divider) value
is passed to __ffs() to calculate back the divider out of the value again.
Factor out VCLK divider calculation into rcar_mipi_dsi_vclk_divider()
function and call the function from both rcar_mipi_dsi_parameters_calc()
and rcar_mipi_dsi_startup() to avoid this back and forth BIT() and _ffs()
and avoid unnecessarily storing the divider value in setup_info at all.
This rework has a slight side-effect, in that it should allow the compiler
to better evaluate the code and avoid compiler warnings about variable
value overflows, which can never happen.
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512051834.bESvhDiG-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202512222321.TeY4VbvK-lkp@intel.com/
Signed-off-by: Marek Vasut <marek.vasut+renesas@mailbox.org>
---
Cc: David Airlie <airlied@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Cc: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-renesas-soc@vger.kernel.org
---
.../gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c | 35 ++++++++++++++-----
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
index 4ef2e3c129ed7..875945bf8255b 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
@@ -84,7 +84,6 @@ struct dsi_setup_info {
unsigned long fout;
u16 m;
u16 n;
- u16 vclk_divider;
const struct dsi_clk_config *clkset;
};
@@ -335,10 +334,24 @@ rcar_mipi_dsi_post_init_phtw_v4h(struct rcar_mipi_dsi *dsi,
* Hardware Setup
*/
+static unsigned int rcar_mipi_dsi_vclk_divider(struct rcar_mipi_dsi *dsi,
+ struct dsi_setup_info *setup_info)
+{
+ switch (dsi->info->model) {
+ case RCAR_DSI_V3U:
+ default:
+ return (setup_info->clkset->vco_cntrl >> 4) & 0x3;
+
+ case RCAR_DSI_V4H:
+ return (setup_info->clkset->vco_cntrl >> 3) & 0x7;
+ }
+}
+
static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
unsigned long fin_rate,
unsigned long fout_target,
- struct dsi_setup_info *setup_info)
+ struct dsi_setup_info *setup_info,
+ u16 vclk_divider)
{
unsigned int best_err = -1;
const struct rcar_mipi_dsi_device_info *info = dsi->info;
@@ -360,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
if (fout < info->fout_min || fout > info->fout_max)
continue;
- fout = div64_u64(fout, setup_info->vclk_divider);
+ fout = div64_u64(fout, vclk_divider);
if (fout < setup_info->clkset->min_freq ||
fout > setup_info->clkset->max_freq)
@@ -390,7 +403,9 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
unsigned long fout_target;
unsigned long fin_rate;
unsigned int i;
+ unsigned int div;
unsigned int err;
+ u16 vclk_divider;
/*
* Calculate Fout = dot clock * ColorDepth / (2 * Lane Count)
@@ -412,18 +427,20 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
fin_rate = clk_get_rate(clk);
+ div = rcar_mipi_dsi_vclk_divider(dsi, setup_info);
+
switch (dsi->info->model) {
case RCAR_DSI_V3U:
default:
- setup_info->vclk_divider = 1 << ((clk_cfg->vco_cntrl >> 4) & 0x3);
+ vclk_divider = BIT_U32(div);
break;
case RCAR_DSI_V4H:
- setup_info->vclk_divider = 1 << (((clk_cfg->vco_cntrl >> 3) & 0x7) + 1);
+ vclk_divider = BIT_U32(div + 1);
break;
}
- rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info);
+ rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info, vclk_divider);
/* Find hsfreqrange */
setup_info->hsfreq = setup_info->fout * 2;
@@ -439,7 +456,7 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
dev_dbg(dsi->dev,
"Fout = %u * %lu / (%u * %u * %u) = %lu (target %lu Hz, error %d.%02u%%)\n",
setup_info->m, fin_rate, dsi->info->n_mul, setup_info->n,
- setup_info->vclk_divider, setup_info->fout, fout_target,
+ vclk_divider, setup_info->fout, fout_target,
err / 100, err % 100);
dev_dbg(dsi->dev,
@@ -653,11 +670,11 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi,
switch (dsi->info->model) {
case RCAR_DSI_V3U:
default:
- vclkset |= VCLKSET_DIV_V3U(__ffs(setup_info.vclk_divider));
+ vclkset |= VCLKSET_DIV_V3U(rcar_mipi_dsi_vclk_divider(dsi, &setup_info));
break;
case RCAR_DSI_V4H:
- vclkset |= VCLKSET_DIV_V4H(__ffs(setup_info.vclk_divider) - 1);
+ vclkset |= VCLKSET_DIV_V4H(rcar_mipi_dsi_vclk_divider(dsi, &setup_info));
break;
}
--
2.51.0
Hi Marek,
On Wed, 31 Dec 2025 at 15:57, Marek Vasut
<marek.vasut+renesas@mailbox.org> wrote:
> Currently, in rcar_mipi_dsi_parameters_calc(), the VCLK divider is stored
> in setup_info structure as BIT(divider). The rcar_mipi_dsi_parameters_calc()
> is called at the early beginning of rcar_mipi_dsi_startup() function. Later,
> in the same rcar_mipi_dsi_startup() function, the stored BIT(divider) value
> is passed to __ffs() to calculate back the divider out of the value again.
>
> Factor out VCLK divider calculation into rcar_mipi_dsi_vclk_divider()
> function and call the function from both rcar_mipi_dsi_parameters_calc()
> and rcar_mipi_dsi_startup() to avoid this back and forth BIT() and _ffs()
> and avoid unnecessarily storing the divider value in setup_info at all.
>
> This rework has a slight side-effect, in that it should allow the compiler
> to better evaluate the code and avoid compiler warnings about variable
> value overflows, which can never happen.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202512051834.bESvhDiG-lkp@intel.com/
> Closes: https://lore.kernel.org/oe-kbuild-all/202512222321.TeY4VbvK-lkp@intel.com/
> Signed-off-by: Marek Vasut <marek.vasut+renesas@mailbox.org>
Thanks for your patch!
> --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
> +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
> @@ -412,18 +427,20 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
>
> fin_rate = clk_get_rate(clk);
>
> + div = rcar_mipi_dsi_vclk_divider(dsi, setup_info);
> +
> switch (dsi->info->model) {
> case RCAR_DSI_V3U:
> default:
> - setup_info->vclk_divider = 1 << ((clk_cfg->vco_cntrl >> 4) & 0x3);
> + vclk_divider = BIT_U32(div);
BIT_U16(), as vclk_divider is u16?
> break;
>
> case RCAR_DSI_V4H:
> - setup_info->vclk_divider = 1 << (((clk_cfg->vco_cntrl >> 3) & 0x7) + 1);
> + vclk_divider = BIT_U32(div + 1);
Likewise.
> break;
> }
>
> - rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info);
> + rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info, vclk_divider);
>
> /* Find hsfreqrange */
> setup_info->hsfreq = setup_info->fout * 2;
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On Wed, 31 Dec 2025 15:56:10 +0100
Marek Vasut <marek.vasut+renesas@mailbox.org> wrote:
> Currently, in rcar_mipi_dsi_parameters_calc(), the VCLK divider is stored
> in setup_info structure as BIT(divider). The rcar_mipi_dsi_parameters_calc()
> is called at the early beginning of rcar_mipi_dsi_startup() function. Later,
> in the same rcar_mipi_dsi_startup() function, the stored BIT(divider) value
> is passed to __ffs() to calculate back the divider out of the value again.
>
> Factor out VCLK divider calculation into rcar_mipi_dsi_vclk_divider()
> function and call the function from both rcar_mipi_dsi_parameters_calc()
> and rcar_mipi_dsi_startup() to avoid this back and forth BIT() and _ffs()
> and avoid unnecessarily storing the divider value in setup_info at all.
>
> This rework has a slight side-effect, in that it should allow the compiler
> to better evaluate the code and avoid compiler warnings about variable
> value overflows, which can never happen.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202512051834.bESvhDiG-lkp@intel.com/
> Closes: https://lore.kernel.org/oe-kbuild-all/202512222321.TeY4VbvK-lkp@intel.com/
> Signed-off-by: Marek Vasut <marek.vasut+renesas@mailbox.org>
> ---
> Cc: David Airlie <airlied@gmail.com>
> Cc: Geert Uytterhoeven <geert+renesas@glider.be>
> Cc: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
> Cc: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Magnus Damm <magnus.damm@gmail.com>
> Cc: Maxime Ripard <mripard@kernel.org>
> Cc: Simona Vetter <simona@ffwll.ch>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Tomi Valkeinen <tomi.valkeinen+renesas@ideasonboard.com>
> Cc: dri-devel@lists.freedesktop.org
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-renesas-soc@vger.kernel.org
> ---
> .../gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c | 35 ++++++++++++++-----
> 1 file changed, 26 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
> index 4ef2e3c129ed7..875945bf8255b 100644
> --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
> +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
> @@ -84,7 +84,6 @@ struct dsi_setup_info {
> unsigned long fout;
> u16 m;
> u16 n;
> - u16 vclk_divider;
> const struct dsi_clk_config *clkset;
> };
>
> @@ -335,10 +334,24 @@ rcar_mipi_dsi_post_init_phtw_v4h(struct rcar_mipi_dsi *dsi,
> * Hardware Setup
> */
>
> +static unsigned int rcar_mipi_dsi_vclk_divider(struct rcar_mipi_dsi *dsi,
> + struct dsi_setup_info *setup_info)
> +{
> + switch (dsi->info->model) {
> + case RCAR_DSI_V3U:
> + default:
> + return (setup_info->clkset->vco_cntrl >> 4) & 0x3;
> +
> + case RCAR_DSI_V4H:
> + return (setup_info->clkset->vco_cntrl >> 3) & 0x7;
> + }
> +}
> +
> static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
> unsigned long fin_rate,
> unsigned long fout_target,
> - struct dsi_setup_info *setup_info)
> + struct dsi_setup_info *setup_info,
> + u16 vclk_divider)
There is no need for this to be u16, unsigned int will generate better code.
> {
> unsigned int best_err = -1;
> const struct rcar_mipi_dsi_device_info *info = dsi->info;
> @@ -360,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
> if (fout < info->fout_min || fout > info->fout_max)
> continue;
>
> - fout = div64_u64(fout, setup_info->vclk_divider);
> + fout = div64_u64(fout, vclk_divider);
Since vclk_divider is BIT_U32(div [+ 1]) this is just a shift right.
So pass the bit number instead.
>
> if (fout < setup_info->clkset->min_freq ||
> fout > setup_info->clkset->max_freq)
> @@ -390,7 +403,9 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
> unsigned long fout_target;
> unsigned long fin_rate;
> unsigned int i;
> + unsigned int div;
> unsigned int err;
> + u16 vclk_divider;
>
> /*
> * Calculate Fout = dot clock * ColorDepth / (2 * Lane Count)
> @@ -412,18 +427,20 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
>
> fin_rate = clk_get_rate(clk);
>
> + div = rcar_mipi_dsi_vclk_divider(dsi, setup_info);
> +
> switch (dsi->info->model) {
> case RCAR_DSI_V3U:
> default:
> - setup_info->vclk_divider = 1 << ((clk_cfg->vco_cntrl >> 4) & 0x3);
> + vclk_divider = BIT_U32(div);
> break;
>
> case RCAR_DSI_V4H:
> - setup_info->vclk_divider = 1 << (((clk_cfg->vco_cntrl >> 3) & 0x7) + 1);
> + vclk_divider = BIT_U32(div + 1);
> break;
> }
>
> - rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info);
> + rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info, vclk_divider);
>
> /* Find hsfreqrange */
> setup_info->hsfreq = setup_info->fout * 2;
> @@ -439,7 +456,7 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
> dev_dbg(dsi->dev,
> "Fout = %u * %lu / (%u * %u * %u) = %lu (target %lu Hz, error %d.%02u%%)\n",
> setup_info->m, fin_rate, dsi->info->n_mul, setup_info->n,
> - setup_info->vclk_divider, setup_info->fout, fout_target,
> + vclk_divider, setup_info->fout, fout_target,
> err / 100, err % 100);
>
> dev_dbg(dsi->dev,
> @@ -653,11 +670,11 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi,
> switch (dsi->info->model) {
> case RCAR_DSI_V3U:
> default:
> - vclkset |= VCLKSET_DIV_V3U(__ffs(setup_info.vclk_divider));
> + vclkset |= VCLKSET_DIV_V3U(rcar_mipi_dsi_vclk_divider(dsi, &setup_info));
What is going on here?
rcar_mipi_dsi_vclk_divider() is (setup_info->clkset->vco_cntrl >> 4) & 0x3
VCLKSET_DIV_V3U(n) FIELD_PREP(VCLKSET_DIV_V3U_MASK, (n))
VCLKSET_DIV_V3U_MASK is GENMASK_U32(5, 4)
Looks like a very complicated way of saying:
vclkset |= setup_info->clkset->vco_cntrl & VCLKSET_DIV_V3U_MASK;
It might be a semi-accident that the bit numbers match.
But I also suspect it is also semi-deliberate.
> break;
>
> case RCAR_DSI_V4H:
> - vclkset |= VCLKSET_DIV_V4H(__ffs(setup_info.vclk_divider) - 1);
> + vclkset |= VCLKSET_DIV_V4H(rcar_mipi_dsi_vclk_divider(dsi, &setup_info));
That one looks like it needs the 'subtract one' done somewhere.
Maybe:
vclkset |= (setup_info->clkset->vco_cntrl - (1u << 3)) &
VCLKSET_DIV_V4U_MASK;
Although you may want to write the '8' in a different (longer) way :-)
David
> break;
> }
>
On 1/1/26 12:42 PM, David Laight wrote:
Hello David,
>> static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
>> unsigned long fin_rate,
>> unsigned long fout_target,
>> - struct dsi_setup_info *setup_info)
>> + struct dsi_setup_info *setup_info,
>> + u16 vclk_divider)
>
> There is no need for this to be u16, unsigned int will generate better code.
Can you please elaborate on the "better code" part ?
>> {
>> unsigned int best_err = -1;
>> const struct rcar_mipi_dsi_device_info *info = dsi->info;
>> @@ -360,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
>> if (fout < info->fout_min || fout > info->fout_max)
>> continue;
>>
>> - fout = div64_u64(fout, setup_info->vclk_divider);
>> + fout = div64_u64(fout, vclk_divider);
>
> Since vclk_divider is BIT_U32(div [+ 1]) this is just a shift right.
> So pass the bit number instead.
While I agree this is a shift in the end, it also makes the code harder
to understand. I can add the following change into V2, but I would like
input from Tomi/Laurent on whether we should really push the
optimization in that direction:
"
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
index 875945bf8255b..c2a0c89b97d93 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
@@ -351,7 +351,7 @@ static void rcar_mipi_dsi_pll_calc(struct
rcar_mipi_dsi *dsi,
unsigned long fin_rate,
unsigned long fout_target,
struct dsi_setup_info *setup_info,
- u16 vclk_divider)
+ unsigned int div)
{
unsigned int best_err = -1;
const struct rcar_mipi_dsi_device_info *info = dsi->info;
@@ -373,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct
rcar_mipi_dsi *dsi,
if (fout < info->fout_min || fout > info->fout_max)
continue;
- fout = div64_u64(fout, vclk_divider);
+ fout >>= div;
if (fout < setup_info->clkset->min_freq ||
fout > setup_info->clkset->max_freq)
@@ -405,7 +405,6 @@ static void rcar_mipi_dsi_parameters_calc(struct
rcar_mipi_dsi *dsi,
unsigned int i;
unsigned int div;
unsigned int err;
- u16 vclk_divider;
/*
* Calculate Fout = dot clock * ColorDepth / (2 * Lane Count)
@@ -429,18 +428,10 @@ static void rcar_mipi_dsi_parameters_calc(struct
rcar_mipi_dsi *dsi,
div = rcar_mipi_dsi_vclk_divider(dsi, setup_info);
- switch (dsi->info->model) {
- case RCAR_DSI_V3U:
- default:
- vclk_divider = BIT_U32(div);
- break;
-
- case RCAR_DSI_V4H:
- vclk_divider = BIT_U32(div + 1);
- break;
- }
+ if (dsi->info->model == RCAR_DSI_V4H)
+ div++;
- rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info,
vclk_divider);
+ rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info, div);
/* Find hsfreqrange */
setup_info->hsfreq = setup_info->fout * 2;
@@ -456,7 +447,7 @@ static void rcar_mipi_dsi_parameters_calc(struct
rcar_mipi_dsi *dsi,
dev_dbg(dsi->dev,
"Fout = %u * %lu / (%u * %u * %u) = %lu (target %lu Hz, error
%d.%02u%%)\n",
setup_info->m, fin_rate, dsi->info->n_mul, setup_info->n,
- vclk_divider, setup_info->fout, fout_target,
+ BIT_U32(div), setup_info->fout, fout_target,
err / 100, err % 100);
dev_dbg(dsi->dev,
"
>>
>> if (fout < setup_info->clkset->min_freq ||
>> fout > setup_info->clkset->max_freq)
>> @@ -390,7 +403,9 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
>> unsigned long fout_target;
>> unsigned long fin_rate;
>> unsigned int i;
>> + unsigned int div;
>> unsigned int err;
>> + u16 vclk_divider;
>>
>> /*
>> * Calculate Fout = dot clock * ColorDepth / (2 * Lane Count)
>> @@ -412,18 +427,20 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
>>
>> fin_rate = clk_get_rate(clk);
>>
>> + div = rcar_mipi_dsi_vclk_divider(dsi, setup_info);
>> +
>> switch (dsi->info->model) {
>> case RCAR_DSI_V3U:
>> default:
>> - setup_info->vclk_divider = 1 << ((clk_cfg->vco_cntrl >> 4) & 0x3);
>> + vclk_divider = BIT_U32(div);
>> break;
>>
>> case RCAR_DSI_V4H:
>> - setup_info->vclk_divider = 1 << (((clk_cfg->vco_cntrl >> 3) & 0x7) + 1);
>> + vclk_divider = BIT_U32(div + 1);
>> break;
>> }
>>
>> - rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info);
>> + rcar_mipi_dsi_pll_calc(dsi, fin_rate, fout_target, setup_info, vclk_divider);
>>
>> /* Find hsfreqrange */
>> setup_info->hsfreq = setup_info->fout * 2;
>> @@ -439,7 +456,7 @@ static void rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi,
>> dev_dbg(dsi->dev,
>> "Fout = %u * %lu / (%u * %u * %u) = %lu (target %lu Hz, error %d.%02u%%)\n",
>> setup_info->m, fin_rate, dsi->info->n_mul, setup_info->n,
>> - setup_info->vclk_divider, setup_info->fout, fout_target,
>> + vclk_divider, setup_info->fout, fout_target,
>> err / 100, err % 100);
>>
>> dev_dbg(dsi->dev,
>> @@ -653,11 +670,11 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi,
>> switch (dsi->info->model) {
>> case RCAR_DSI_V3U:
>> default:
>> - vclkset |= VCLKSET_DIV_V3U(__ffs(setup_info.vclk_divider));
>> + vclkset |= VCLKSET_DIV_V3U(rcar_mipi_dsi_vclk_divider(dsi, &setup_info));
>
> What is going on here?
> rcar_mipi_dsi_vclk_divider() is (setup_info->clkset->vco_cntrl >> 4) & 0x3
> VCLKSET_DIV_V3U(n) FIELD_PREP(VCLKSET_DIV_V3U_MASK, (n))
> VCLKSET_DIV_V3U_MASK is GENMASK_U32(5, 4)
> Looks like a very complicated way of saying:
> vclkset |= setup_info->clkset->vco_cntrl & VCLKSET_DIV_V3U_MASK;
>
> It might be a semi-accident that the bit numbers match.
> But I also suspect it is also semi-deliberate.
The bitfield placement does match here, it does not match for the V4H
below anymore. The vco_cntrl is a precalculated value for a bitfield
that is written into another register, this bitfield here uses a subset
of it and must be kept in sync with the other register. The FIELD_PREP
usage here is correct.
>> break;
>>
>> case RCAR_DSI_V4H:
>> - vclkset |= VCLKSET_DIV_V4H(__ffs(setup_info.vclk_divider) - 1);
>> + vclkset |= VCLKSET_DIV_V4H(rcar_mipi_dsi_vclk_divider(dsi, &setup_info));
>
> That one looks like it needs the 'subtract one' done somewhere.
> Maybe:
> vclkset |= (setup_info->clkset->vco_cntrl - (1u << 3)) &
> VCLKSET_DIV_V4U_MASK;
> Although you may want to write the '8' in a different (longer) way :-)
Please see hunk "@@ -412,18 +427,20 @@ static void
rcar_mipi_dsi_parameters_calc(struct rcar_mipi_dsi *dsi," above, that
one removes the +1 part, so the -1 has to be removed here too.
Thanks
On Thu, 1 Jan 2026 22:26:34 +0100
Marek Vasut <marek.vasut@mailbox.org> wrote:
> On 1/1/26 12:42 PM, David Laight wrote:
>
> Hello David,
>
> >> static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
> >> unsigned long fin_rate,
> >> unsigned long fout_target,
> >> - struct dsi_setup_info *setup_info)
> >> + struct dsi_setup_info *setup_info,
> >> + u16 vclk_divider)
> >
> > There is no need for this to be u16, unsigned int will generate better code.
>
> Can you please elaborate on the "better code" part ?
Basically the compiler has to generate extra code to ensure the value
doesn't exceed 65535.
So there will be a mask of the function parameter (passes in a 32bit register).
Any arithmetic needs similar masking of the result.
You won't see the latter (as much) on x86 (or m68k) because the compiler
can use the C 'as if' rule and use the cpu's 8/16 bit registers and alu.
But pretty much no other cpu can do that, so extra instructions are needed
to stop the value (in a 32bit register) exceeding the limit for the variable.
Remember that while C variables can be char/short the values they contain
are promoted to 'signed int' before (pretty much) anything is done with
the value, any calculated value is then truncated before being assigned back.
For on-stack values this is fine - but modern compilers was to keep values
in registers as much as possible.
>
> >> {
> >> unsigned int best_err = -1;
> >> const struct rcar_mipi_dsi_device_info *info = dsi->info;
> >> @@ -360,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
> >> if (fout < info->fout_min || fout > info->fout_max)
> >> continue;
> >>
> >> - fout = div64_u64(fout, setup_info->vclk_divider);
> >> + fout = div64_u64(fout, vclk_divider);
> >
> > Since vclk_divider is BIT_U32(div [+ 1]) this is just a shift right.
> > So pass the bit number instead.
>
> While I agree this is a shift in the end, it also makes the code harder
> to understand. I can add the following change into V2, but I would like
> input from Tomi/Laurent on whether we should really push the
> optimization in that direction:
The shift really is a lot faster.
Even on 64bit x86 cpus it can be slow - 35-88 clocks prior to 'Cannon Lake'.
AMD cpu get better with Zen3, and using a 64bit divide with 32bits values
doesn't slow things down (it does on the Intel cpu).
Don't even think about what happens on 32bit cpus.
I don't know about other architectures.
Just comment as 'x >> n' rather than 'x / (1 << n)'
David
On 1/2/26 12:13 AM, David Laight wrote:
> On Thu, 1 Jan 2026 22:26:34 +0100
> Marek Vasut <marek.vasut@mailbox.org> wrote:
>
>> On 1/1/26 12:42 PM, David Laight wrote:
>>
>> Hello David,
>>
>>>> static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
>>>> unsigned long fin_rate,
>>>> unsigned long fout_target,
>>>> - struct dsi_setup_info *setup_info)
>>>> + struct dsi_setup_info *setup_info,
>>>> + u16 vclk_divider)
>>>
>>> There is no need for this to be u16, unsigned int will generate better code.
>>
>> Can you please elaborate on the "better code" part ?
>
> Basically the compiler has to generate extra code to ensure the value
> doesn't exceed 65535.
> So there will be a mask of the function parameter (passes in a 32bit register).
> Any arithmetic needs similar masking of the result.
> You won't see the latter (as much) on x86 (or m68k) because the compiler
> can use the C 'as if' rule and use the cpu's 8/16 bit registers and alu.
> But pretty much no other cpu can do that, so extra instructions are needed
> to stop the value (in a 32bit register) exceeding the limit for the variable.
>
> Remember that while C variables can be char/short the values they contain
> are promoted to 'signed int' before (pretty much) anything is done with
> the value, any calculated value is then truncated before being assigned back.
> For on-stack values this is fine - but modern compilers was to keep values
> in registers as much as possible.
>
>>
>>>> {
>>>> unsigned int best_err = -1;
>>>> const struct rcar_mipi_dsi_device_info *info = dsi->info;
>>>> @@ -360,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
>>>> if (fout < info->fout_min || fout > info->fout_max)
>>>> continue;
>>>>
>>>> - fout = div64_u64(fout, setup_info->vclk_divider);
>>>> + fout = div64_u64(fout, vclk_divider);
>>>
>>> Since vclk_divider is BIT_U32(div [+ 1]) this is just a shift right.
>>> So pass the bit number instead.
>>
>> While I agree this is a shift in the end, it also makes the code harder
>> to understand. I can add the following change into V2, but I would like
>> input from Tomi/Laurent on whether we should really push the
>> optimization in that direction:
>
> The shift really is a lot faster.
> Even on 64bit x86 cpus it can be slow - 35-88 clocks prior to 'Cannon Lake'.
> AMD cpu get better with Zen3, and using a 64bit divide with 32bits values
> doesn't slow things down (it does on the Intel cpu).
> Don't even think about what happens on 32bit cpus.
> I don't know about other architectures.
> Just comment as 'x >> n' rather than 'x / (1 << n)'
Please note that this code is built primarily for arm64 , so discussing
x86 or m68k optimizations doesn't make much sense here.
On Fri, 2 Jan 2026 00:25:54 +0100
Marek Vasut <marek.vasut@mailbox.org> wrote:
> On 1/2/26 12:13 AM, David Laight wrote:
> > On Thu, 1 Jan 2026 22:26:34 +0100
> > Marek Vasut <marek.vasut@mailbox.org> wrote:
> >
> >> On 1/1/26 12:42 PM, David Laight wrote:
> >>
> >> Hello David,
> >>
> >>>> static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
> >>>> unsigned long fin_rate,
> >>>> unsigned long fout_target,
> >>>> - struct dsi_setup_info *setup_info)
> >>>> + struct dsi_setup_info *setup_info,
> >>>> + u16 vclk_divider)
> >>>
> >>> There is no need for this to be u16, unsigned int will generate better code.
> >>
> >> Can you please elaborate on the "better code" part ?
> >
> > Basically the compiler has to generate extra code to ensure the value
> > doesn't exceed 65535.
> > So there will be a mask of the function parameter (passes in a 32bit register).
> > Any arithmetic needs similar masking of the result.
> > You won't see the latter (as much) on x86 (or m68k) because the compiler
> > can use the C 'as if' rule and use the cpu's 8/16 bit registers and alu.
> > But pretty much no other cpu can do that, so extra instructions are needed
> > to stop the value (in a 32bit register) exceeding the limit for the variable.
> >
> > Remember that while C variables can be char/short the values they contain
> > are promoted to 'signed int' before (pretty much) anything is done with
> > the value, any calculated value is then truncated before being assigned back.
> > For on-stack values this is fine - but modern compilers was to keep values
> > in registers as much as possible.
> >
> >>
> >>>> {
> >>>> unsigned int best_err = -1;
> >>>> const struct rcar_mipi_dsi_device_info *info = dsi->info;
> >>>> @@ -360,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
> >>>> if (fout < info->fout_min || fout > info->fout_max)
> >>>> continue;
> >>>>
> >>>> - fout = div64_u64(fout, setup_info->vclk_divider);
> >>>> + fout = div64_u64(fout, vclk_divider);
> >>>
> >>> Since vclk_divider is BIT_U32(div [+ 1]) this is just a shift right.
> >>> So pass the bit number instead.
> >>
> >> While I agree this is a shift in the end, it also makes the code harder
> >> to understand. I can add the following change into V2, but I would like
> >> input from Tomi/Laurent on whether we should really push the
> >> optimization in that direction:
> >
> > The shift really is a lot faster.
> > Even on 64bit x86 cpus it can be slow - 35-88 clocks prior to 'Cannon Lake'.
> > AMD cpu get better with Zen3, and using a 64bit divide with 32bits values
> > doesn't slow things down (it does on the Intel cpu).
> > Don't even think about what happens on 32bit cpus.
> > I don't know about other architectures.
> > Just comment as 'x >> n' rather than 'x / (1 << n)'
>
> Please note that this code is built primarily for arm64 , so discussing
> x86 or m68k optimizations doesn't make much sense here.
ARM definitely only has 32 and 64bit maths - so you will see masking
instructions for arithmetic on char/short values (unless the compiler
can tell that the values cannot get too large.)
Divide performance is cpu dependant - the only arm cpu I've used only
had software divide (but a fast barrel shifter).
If you think that Intel haven't thrown much silicon at integer divide
it isn't that likely that arm will have a divide that is much faster
than 1 clock for each bit in the quotient.
(Of course I might be wrong...)
David
Hi,
On 02/01/2026 01:44, David Laight wrote:
> On Fri, 2 Jan 2026 00:25:54 +0100
> Marek Vasut <marek.vasut@mailbox.org> wrote:
>
>> On 1/2/26 12:13 AM, David Laight wrote:
>>> On Thu, 1 Jan 2026 22:26:34 +0100
>>> Marek Vasut <marek.vasut@mailbox.org> wrote:
>>>
>>>> On 1/1/26 12:42 PM, David Laight wrote:
>>>>
>>>> Hello David,
>>>>
>>>>>> static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
>>>>>> unsigned long fin_rate,
>>>>>> unsigned long fout_target,
>>>>>> - struct dsi_setup_info *setup_info)
>>>>>> + struct dsi_setup_info *setup_info,
>>>>>> + u16 vclk_divider)
>>>>>
>>>>> There is no need for this to be u16, unsigned int will generate better code.
>>>>
>>>> Can you please elaborate on the "better code" part ?
>>>
>>> Basically the compiler has to generate extra code to ensure the value
>>> doesn't exceed 65535.
>>> So there will be a mask of the function parameter (passes in a 32bit register).
>>> Any arithmetic needs similar masking of the result.
>>> You won't see the latter (as much) on x86 (or m68k) because the compiler
>>> can use the C 'as if' rule and use the cpu's 8/16 bit registers and alu.
>>> But pretty much no other cpu can do that, so extra instructions are needed
>>> to stop the value (in a 32bit register) exceeding the limit for the variable.
>>>
>>> Remember that while C variables can be char/short the values they contain
>>> are promoted to 'signed int' before (pretty much) anything is done with
>>> the value, any calculated value is then truncated before being assigned back.
>>> For on-stack values this is fine - but modern compilers was to keep values
>>> in registers as much as possible.
>>>
>>>>
>>>>>> {
>>>>>> unsigned int best_err = -1;
>>>>>> const struct rcar_mipi_dsi_device_info *info = dsi->info;
>>>>>> @@ -360,7 +373,7 @@ static void rcar_mipi_dsi_pll_calc(struct rcar_mipi_dsi *dsi,
>>>>>> if (fout < info->fout_min || fout > info->fout_max)
>>>>>> continue;
>>>>>>
>>>>>> - fout = div64_u64(fout, setup_info->vclk_divider);
>>>>>> + fout = div64_u64(fout, vclk_divider);
>>>>>
>>>>> Since vclk_divider is BIT_U32(div [+ 1]) this is just a shift right.
>>>>> So pass the bit number instead.
>>>>
>>>> While I agree this is a shift in the end, it also makes the code harder
>>>> to understand. I can add the following change into V2, but I would like
>>>> input from Tomi/Laurent on whether we should really push the
>>>> optimization in that direction:
>>>
>>> The shift really is a lot faster.
>>> Even on 64bit x86 cpus it can be slow - 35-88 clocks prior to 'Cannon Lake'.
>>> AMD cpu get better with Zen3, and using a 64bit divide with 32bits values
>>> doesn't slow things down (it does on the Intel cpu).
>>> Don't even think about what happens on 32bit cpus.
>>> I don't know about other architectures.
>>> Just comment as 'x >> n' rather than 'x / (1 << n)'
>>
>> Please note that this code is built primarily for arm64 , so discussing
>> x86 or m68k optimizations doesn't make much sense here.
>
> ARM definitely only has 32 and 64bit maths - so you will see masking
> instructions for arithmetic on char/short values (unless the compiler
> can tell that the values cannot get too large.)
>
> Divide performance is cpu dependant - the only arm cpu I've used only
> had software divide (but a fast barrel shifter).
> If you think that Intel haven't thrown much silicon at integer divide
> it isn't that likely that arm will have a divide that is much faster
> than 1 clock for each bit in the quotient.
> (Of course I might be wrong...)
The division is done once when enabling the display? If so, I'd
prioritize readability. That said, division done with shift if quite
common, so I'm not sure if it would be that bad with a few comments.
Tomi
© 2016 - 2026 Red Hat, Inc.