drivers/iio/industrialio-gts-helper.c | 250 +++++++++++++++----------- 1 file changed, 149 insertions(+), 101 deletions(-)
Make available scale building more clear. This hurts the performance
quite a bit by looping throgh the scales many times instead of doing
everything in one loop. It however simplifies logic by:
- decoupling the gain and scale allocations & computations
- keeping the temporary 'per_time_gains' table inside the
per_time_scales computation function.
- separating building the 'all scales' table in own function and doing
it based on the already computed per-time scales.
Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
---
In my (not always) humble (enough) opinion:
- Building the available scales tables was confusing.
- The result of this patch looks much clearer and is simpler to follow.
- Handles memory allocations and freeing in somehow easyish to follow
manner while still:
- Avoids introducing mid-function variables
- Avoids mixing and matching 'scoped' allocs with regular ones
I however send this as an RFC because it hurts the performance quite a
bit. (No measurements done, I doubt exact numbers matter. I'd just say
it more than doubles the time, prbably triples or quadruples). Well, it
is not really on a hot path though, tables are computed once at
start-up, and with a sane amount of gains/times this is likely not a
real problem.
This has been tested only by running the kunit tests for the gts-helpers
in a beaglebone black. Further testing and eyeing is appreciated :)
---
drivers/iio/industrialio-gts-helper.c | 250 +++++++++++++++-----------
1 file changed, 149 insertions(+), 101 deletions(-)
diff --git a/drivers/iio/industrialio-gts-helper.c b/drivers/iio/industrialio-gts-helper.c
index 291c0fc332c9..01206bc3e48e 100644
--- a/drivers/iio/industrialio-gts-helper.c
+++ b/drivers/iio/industrialio-gts-helper.c
@@ -160,16 +160,108 @@ static void iio_gts_purge_avail_scale_table(struct iio_gts *gts)
gts->num_avail_all_scales = 0;
}
+static int scale_eq(int *sc1, int *sc2)
+{
+ return sc1[0] == sc2[0] && sc1[1] == sc2[1];
+}
+
+static int scale_smaller(int *sc1, int *sc2)
+{
+ if (sc1[0] != sc2[0])
+ return sc1[0] < sc2[0];
+
+ /* If integer parts are equal, fixp parts */
+ return sc1[1] < sc2[1];
+}
+
+static int do_combined_scaletable(struct iio_gts *gts, size_t scale_bytes)
+{
+ int t_idx, i, new_idx;
+ int **scales = gts->per_time_avail_scale_tables;
+ int *all_scales = kcalloc(gts->num_itime, scale_bytes, GFP_KERNEL);
+
+ if (!all_scales)
+ return -ENOMEM;
+
+ t_idx = gts->num_itime - 1;
+ memcpy(all_scales, scales[t_idx], scale_bytes);
+ new_idx = gts->num_hwgain * 2;
+
+ while (t_idx-- > 0) {
+ for (i = 0; i < gts->num_hwgain ; i++) {
+ int *candidate = &scales[t_idx][i * 2];
+ int chk;
+
+ if (scale_smaller(candidate, &all_scales[new_idx - 2])) {
+ all_scales[new_idx] = candidate[0];
+ all_scales[new_idx + 1] = candidate[1];
+ new_idx += 2;
+
+ continue;
+ }
+ for (chk = 0; chk < new_idx; chk += 2)
+ if (!scale_smaller(candidate, &all_scales[chk]))
+ break;
+
+
+ if (scale_eq(candidate, &all_scales[chk]))
+ continue;
+
+ memmove(&all_scales[chk + 2], &all_scales[chk],
+ (new_idx - chk) * sizeof(int));
+ all_scales[chk] = candidate[0];
+ all_scales[chk + 1] = candidate[1];
+ new_idx += 2;
+ }
+ }
+
+ gts->num_avail_all_scales = new_idx / 2;
+ gts->avail_all_scales_table = all_scales;
+
+ return 0;
+}
+
+static void iio_gts_free_int_table_array(int **arr, int num_tables)
+{
+ int i;
+
+ for (i = 0; i < num_tables; i++)
+ kfree(arr[i]);
+
+ kfree(arr);
+}
+
+static int iio_gts_alloc_int_table_array(int ***arr, int num_tables, int num_table_items)
+{
+ int i, **tmp;
+
+ tmp = kcalloc(num_tables, sizeof(**arr), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ for (i = 0; i < num_tables; i++) {
+ tmp[i] = kcalloc(num_table_items, sizeof(int), GFP_KERNEL);
+ if (!tmp[i])
+ goto err_free;
+ }
+
+ *arr = tmp;
+
+ return 0;
+err_free:
+ iio_gts_free_int_table_array(tmp, i);
+
+ return -ENOMEM;
+}
+
static int iio_gts_gain_cmp(const void *a, const void *b)
{
return *(int *)a - *(int *)b;
}
-static int gain_to_scaletables(struct iio_gts *gts, int **gains, int **scales)
+static int fill_and_sort_scaletables(struct iio_gts *gts, int **gains, int **scales)
{
- int i, j, new_idx, time_idx, ret = 0;
- int *all_gains;
- size_t gain_bytes;
+ int i, j, ret;
for (i = 0; i < gts->num_itime; i++) {
/*
@@ -189,73 +281,59 @@ static int gain_to_scaletables(struct iio_gts *gts, int **gains, int **scales)
}
}
- gain_bytes = array_size(gts->num_hwgain, sizeof(int));
- all_gains = kcalloc(gts->num_itime, gain_bytes, GFP_KERNEL);
- if (!all_gains)
- return -ENOMEM;
+ return 0;
+}
- /*
- * We assume all the gains for same integration time were unique.
- * It is likely the first time table had greatest time multiplier as
- * the times are in the order of preference and greater times are
- * usually preferred. Hence we start from the last table which is likely
- * to have the smallest total gains.
- */
- time_idx = gts->num_itime - 1;
- memcpy(all_gains, gains[time_idx], gain_bytes);
- new_idx = gts->num_hwgain;
+static void compute_per_time_gains(struct iio_gts *gts, int **gains)
+{
+ int i, j;
- while (time_idx-- > 0) {
- for (j = 0; j < gts->num_hwgain; j++) {
- int candidate = gains[time_idx][j];
- int chk;
+ for (i = 0; i < gts->num_itime; i++) {
+ for (j = 0; j < gts->num_hwgain; j++)
+ gains[i][j] = gts->hwgain_table[j].gain *
+ gts->itime_table[i].mul;
+ }
+}
- if (candidate > all_gains[new_idx - 1]) {
- all_gains[new_idx] = candidate;
- new_idx++;
+static int compute_per_time_tables(struct iio_gts *gts, int **scales)
+{
+ int **per_time_gains;
+ int ret;
- continue;
- }
- for (chk = 0; chk < new_idx; chk++)
- if (candidate <= all_gains[chk])
- break;
+ ret = iio_gts_alloc_int_table_array(&per_time_gains, gts->num_itime,
+ gts->num_hwgain);
+ if (ret)
+ return ret;
- if (candidate == all_gains[chk])
- continue;
+ compute_per_time_gains(gts, per_time_gains);
- memmove(&all_gains[chk + 1], &all_gains[chk],
- (new_idx - chk) * sizeof(int));
- all_gains[chk] = candidate;
- new_idx++;
- }
- }
+ ret = fill_and_sort_scaletables(gts, per_time_gains, scales);
- gts->avail_all_scales_table = kcalloc(new_idx, 2 * sizeof(int),
- GFP_KERNEL);
- if (!gts->avail_all_scales_table) {
- ret = -ENOMEM;
- goto free_out;
- }
- gts->num_avail_all_scales = new_idx;
+ iio_gts_free_int_table_array(per_time_gains, gts->num_itime);
- for (i = 0; i < gts->num_avail_all_scales; i++) {
- ret = iio_gts_total_gain_to_scale(gts, all_gains[i],
- >s->avail_all_scales_table[i * 2],
- >s->avail_all_scales_table[i * 2 + 1]);
+ return ret;
+}
- if (ret) {
- kfree(gts->avail_all_scales_table);
- gts->num_avail_all_scales = 0;
- goto free_out;
- }
- }
+static int **create_per_time_scales(struct iio_gts *gts)
+{
+ int **per_time_scales, ret;
-free_out:
- kfree(all_gains);
+ ret = iio_gts_alloc_int_table_array(&per_time_scales, gts->num_itime,
+ gts->num_hwgain * 2);
+ if (ret)
+ return ERR_PTR(ret);
- return ret;
-}
+ ret = compute_per_time_tables(gts, per_time_scales);
+ if (ret)
+ goto err_out;
+
+ return per_time_scales;
+err_out:
+ iio_gts_free_int_table_array(per_time_scales, gts->num_itime);
+
+ return ERR_PTR(ret);
+}
/**
* iio_gts_build_avail_scale_table - create tables of available scales
* @gts: Gain time scale descriptor
@@ -275,55 +353,25 @@ static int gain_to_scaletables(struct iio_gts *gts, int **gains, int **scales)
*/
static int iio_gts_build_avail_scale_table(struct iio_gts *gts)
{
- int **per_time_gains, **per_time_scales, i, j, ret = -ENOMEM;
-
- per_time_gains = kcalloc(gts->num_itime, sizeof(*per_time_gains), GFP_KERNEL);
- if (!per_time_gains)
- return ret;
-
- per_time_scales = kcalloc(gts->num_itime, sizeof(*per_time_scales), GFP_KERNEL);
- if (!per_time_scales)
- goto free_gains;
-
- for (i = 0; i < gts->num_itime; i++) {
- per_time_scales[i] = kcalloc(gts->num_hwgain, 2 * sizeof(int),
- GFP_KERNEL);
- if (!per_time_scales[i])
- goto err_free_out;
-
- per_time_gains[i] = kcalloc(gts->num_hwgain, sizeof(int),
- GFP_KERNEL);
- if (!per_time_gains[i]) {
- kfree(per_time_scales[i]);
- goto err_free_out;
- }
+ int ret, scale_bytes;
+ int **per_time_scales;
- for (j = 0; j < gts->num_hwgain; j++)
- per_time_gains[i][j] = gts->hwgain_table[j].gain *
- gts->itime_table[i].mul;
- }
+ if (unlikely(check_mul_overflow(gts->num_hwgain, 2 * sizeof(int), &scale_bytes)))
+ return -EOVERFLOW;
- ret = gain_to_scaletables(gts, per_time_gains, per_time_scales);
- if (ret)
- goto err_free_out;
+ per_time_scales = create_per_time_scales(gts);
+ if (IS_ERR(per_time_scales))
+ return PTR_ERR(per_time_scales);
- for (i = 0; i < gts->num_itime; i++)
- kfree(per_time_gains[i]);
- kfree(per_time_gains);
gts->per_time_avail_scale_tables = per_time_scales;
- return 0;
-
-err_free_out:
- for (i--; i >= 0; i--) {
- kfree(per_time_scales[i]);
- kfree(per_time_gains[i]);
+ ret = do_combined_scaletable(gts, scale_bytes);
+ if (ret) {
+ iio_gts_free_int_table_array(per_time_scales, gts->num_itime);
+ return ret;
}
- kfree(per_time_scales);
-free_gains:
- kfree(per_time_gains);
- return ret;
+ return 0;
}
static void iio_gts_us_to_int_micro(int *time_us, int *int_micro_times,
base-commit: 05ff9c9c53c643551fe08fe52bd714310b9afc2e
--
2.47.0
On Mon, 9 Dec 2024 09:58:41 +0200
Matti Vaittinen <mazziesaccount@gmail.com> wrote:
> Make available scale building more clear. This hurts the performance
> quite a bit by looping throgh the scales many times instead of doing
> everything in one loop. It however simplifies logic by:
> - decoupling the gain and scale allocations & computations
> - keeping the temporary 'per_time_gains' table inside the
> per_time_scales computation function.
> - separating building the 'all scales' table in own function and doing
> it based on the already computed per-time scales.
>
> Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Hi Matti,
I'm definitely keen to see easier to follow code and agree that the
cost doesn't matter (Within reason).
I think a few more comments and rethinks of function names would
make it clearer still. If each subfunction called has a clear
statement of what it's inputs and outputs are that would help
a lot as sort functions in particular tend to be tricky to figure out
by eyeballing them.
Jonathan
>
> ---
> In my (not always) humble (enough) opinion:
> - Building the available scales tables was confusing.
> - The result of this patch looks much clearer and is simpler to follow.
> - Handles memory allocations and freeing in somehow easyish to follow
> manner while still:
> - Avoids introducing mid-function variables
> - Avoids mixing and matching 'scoped' allocs with regular ones
>
> I however send this as an RFC because it hurts the performance quite a
> bit. (No measurements done, I doubt exact numbers matter. I'd just say
> it more than doubles the time, prbably triples or quadruples). Well, it
> is not really on a hot path though, tables are computed once at
> start-up, and with a sane amount of gains/times this is likely not a
> real problem.
>
> This has been tested only by running the kunit tests for the gts-helpers
> in a beaglebone black. Further testing and eyeing is appreciated :)
> ---
> drivers/iio/industrialio-gts-helper.c | 250 +++++++++++++++-----------
> 1 file changed, 149 insertions(+), 101 deletions(-)
>
> diff --git a/drivers/iio/industrialio-gts-helper.c b/drivers/iio/industrialio-gts-helper.c
> index 291c0fc332c9..01206bc3e48e 100644
> --- a/drivers/iio/industrialio-gts-helper.c
> +++ b/drivers/iio/industrialio-gts-helper.c
> @@ -160,16 +160,108 @@ static void iio_gts_purge_avail_scale_table(struct iio_gts *gts)
> gts->num_avail_all_scales = 0;
> }
> +
> +static int do_combined_scaletable(struct iio_gts *gts, size_t scale_bytes)
Probably name this to indicate what it is doing to the combined scaletable.
Maybe make it clear that scale_bytes is of the whole scale table (i think!)
scale_table_bytes.
A few comments might also be useful.
> +{
> + int t_idx, i, new_idx;
> + int **scales = gts->per_time_avail_scale_tables;
> + int *all_scales = kcalloc(gts->num_itime, scale_bytes, GFP_KERNEL);
> +
> + if (!all_scales)
> + return -ENOMEM;
> +
> + t_idx = gts->num_itime - 1;
> + memcpy(all_scales, scales[t_idx], scale_bytes);
I'm not 100% sure what that is copying in, so maybe a comment.
Is it just filling the final integration time with the unadjusted
scale table? If so, maybe say why.
> + new_idx = gts->num_hwgain * 2;
Comment on where you are starting the index. One row into a matrix?
> +
> + while (t_idx-- > 0) {
> + for (i = 0; i < gts->num_hwgain ; i++) {
> + int *candidate = &scales[t_idx][i * 2];
> + int chk;
> +
> + if (scale_smaller(candidate, &all_scales[new_idx - 2])) {
> + all_scales[new_idx] = candidate[0];
> + all_scales[new_idx + 1] = candidate[1];
> + new_idx += 2;
> +
> + continue;
> + }
> + for (chk = 0; chk < new_idx; chk += 2)
> + if (!scale_smaller(candidate, &all_scales[chk]))
> + break;
> +
> +
> + if (scale_eq(candidate, &all_scales[chk]))
> + continue;
> +
> + memmove(&all_scales[chk + 2], &all_scales[chk],
> + (new_idx - chk) * sizeof(int));
> + all_scales[chk] = candidate[0];
> + all_scales[chk + 1] = candidate[1];
> + new_idx += 2;
> + }
> + }
> +
> + gts->num_avail_all_scales = new_idx / 2;
> + gts->avail_all_scales_table = all_scales;
> +
> + return 0;
> +}
> - /*
> - * We assume all the gains for same integration time were unique.
> - * It is likely the first time table had greatest time multiplier as
> - * the times are in the order of preference and greater times are
> - * usually preferred. Hence we start from the last table which is likely
> - * to have the smallest total gains.
> - */
ah. This is one of the comments I'd like to see up above.
> - time_idx = gts->num_itime - 1;
> - memcpy(all_gains, gains[time_idx], gain_bytes);
> - new_idx = gts->num_hwgain;
> +static void compute_per_time_gains(struct iio_gts *gts, int **gains)
> +{
> + int i, j;
>
Hi Jonathan,
Thanks for the comments!
On 15/12/2024 14:54, Jonathan Cameron wrote:
> On Mon, 9 Dec 2024 09:58:41 +0200
> Matti Vaittinen <mazziesaccount@gmail.com> wrote:
>
>> Make available scale building more clear. This hurts the performance
>> quite a bit by looping throgh the scales many times instead of doing
>> everything in one loop. It however simplifies logic by:
>> - decoupling the gain and scale allocations & computations
>> - keeping the temporary 'per_time_gains' table inside the
>> per_time_scales computation function.
>> - separating building the 'all scales' table in own function and doing
>> it based on the already computed per-time scales.
>>
>> Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
> Hi Matti,
>
> I'm definitely keen to see easier to follow code and agree that the
> cost doesn't matter (Within reason).
>
> I think a few more comments and rethinks of function names would
> make it clearer still. If each subfunction called has a clear
> statement of what it's inputs and outputs are that would help
> a lot as sort functions in particular tend to be tricky to figure out
> by eyeballing them.
I'll see if I can come up with something more descriptive while keeping
the names reasonably short.
>> ---
>> In my (not always) humble (enough) opinion:
>> - Building the available scales tables was confusing.
>> - The result of this patch looks much clearer and is simpler to follow.
>> - Handles memory allocations and freeing in somehow easyish to follow
>> manner while still:
>> - Avoids introducing mid-function variables
>> - Avoids mixing and matching 'scoped' allocs with regular ones
>>
>> I however send this as an RFC because it hurts the performance quite a
>> bit. (No measurements done, I doubt exact numbers matter. I'd just say
>> it more than doubles the time, prbably triples or quadruples). Well, it
>> is not really on a hot path though, tables are computed once at
>> start-up, and with a sane amount of gains/times this is likely not a
>> real problem.
>>
>> This has been tested only by running the kunit tests for the gts-helpers
>> in a beaglebone black. Further testing and eyeing is appreciated :)
>> ---
>> drivers/iio/industrialio-gts-helper.c | 250 +++++++++++++++-----------
>> 1 file changed, 149 insertions(+), 101 deletions(-)
>>
>> diff --git a/drivers/iio/industrialio-gts-helper.c b/drivers/iio/industrialio-gts-helper.c
>> index 291c0fc332c9..01206bc3e48e 100644
>> --- a/drivers/iio/industrialio-gts-helper.c
>> +++ b/drivers/iio/industrialio-gts-helper.c
>> @@ -160,16 +160,108 @@ static void iio_gts_purge_avail_scale_table(struct iio_gts *gts)
>> gts->num_avail_all_scales = 0;
>> }
>
>> +
>> +static int do_combined_scaletable(struct iio_gts *gts, size_t scale_bytes)
>
> Probably name this to indicate what it is doing to the combined scaletable.
Hmm. I think I understand what you mean. Still, I kind of think the
function name should reflect what the function does (creates the scale
table where all the scales are listed by combining all unique scales
from the per-time scale tables).
Maybe this could be accompanied by a comment which also explains what
how this is done.
> Maybe make it clear that scale_bytes is of the whole scale table (i think!)
> scale_table_bytes.
I like this idea :)
>
> A few comments might also be useful.
I agree. Especially if we keep the name reflecting the creation of the
"all scales" table :)
>> +{
>> + int t_idx, i, new_idx;
>> + int **scales = gts->per_time_avail_scale_tables;
>> + int *all_scales = kcalloc(gts->num_itime, scale_bytes, GFP_KERNEL);
>> +
>> + if (!all_scales)
>> + return -ENOMEM;
>> +
>> + t_idx = gts->num_itime - 1;
>> + memcpy(all_scales, scales[t_idx], scale_bytes);
>
> I'm not 100% sure what that is copying in, so maybe a comment.
> Is it just filling the final integration time with the unadjusted
> scale table? If so, maybe say why.
>
>> + new_idx = gts->num_hwgain * 2;
>
> Comment on where you are starting the index. One row into a matrix?
>
>> +
>> + while (t_idx-- > 0) {
>> + for (i = 0; i < gts->num_hwgain ; i++) {
>> + int *candidate = &scales[t_idx][i * 2];
>> + int chk;
>> +
>> + if (scale_smaller(candidate, &all_scales[new_idx - 2])) {
>> + all_scales[new_idx] = candidate[0];
>> + all_scales[new_idx + 1] = candidate[1];
>> + new_idx += 2;
>> +
>> + continue;
>> + }
>> + for (chk = 0; chk < new_idx; chk += 2)
>> + if (!scale_smaller(candidate, &all_scales[chk]))
>> + break;
>> +
>> +
>> + if (scale_eq(candidate, &all_scales[chk]))
>> + continue;
>> +
>> + memmove(&all_scales[chk + 2], &all_scales[chk],
>> + (new_idx - chk) * sizeof(int));
>> + all_scales[chk] = candidate[0];
>> + all_scales[chk + 1] = candidate[1];
>> + new_idx += 2;
>> + }
>> + }
>> +
>> + gts->num_avail_all_scales = new_idx / 2;
>> + gts->avail_all_scales_table = all_scales;
>> +
>> + return 0;
>> +}
>
>
>> - /*
>> - * We assume all the gains for same integration time were unique.
>> - * It is likely the first time table had greatest time multiplier as
>> - * the times are in the order of preference and greater times are
>> - * usually preferred. Hence we start from the last table which is likely
>> - * to have the smallest total gains.
>> - */
> ah. This is one of the comments I'd like to see up above.
Right! I'll re-add this comment to correct location :)
>
>> - time_idx = gts->num_itime - 1;
>> - memcpy(all_gains, gains[time_idx], gain_bytes);
>> - new_idx = gts->num_hwgain;
>> +static void compute_per_time_gains(struct iio_gts *gts, int **gains)
>> +{
>> + int i, j;
>>
Thanks a lot Jonathan! I feel your feedback helps to make this better :)
Yours,
-- Matti
© 2016 - 2025 Red Hat, Inc.