[PATCH] minmax: clamp more efficiently by avoiding extra comparison

Jason A. Donenfeld posted 1 patch 4 days, 17 hours ago
There is a newer version of this series
include/linux/minmax.h | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
[PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 17 hours ago
Currently the clamp algorithm does:

	if (val > hi)
		val = hi;
	if (val < lo)
		val = lo;

But since hi > lo by definition, this can be made more efficient with:

	if (val > hi)
		val = hi;
	else if (val < lo)
		val = lo;

So fix up the clamp and clamp_t functions to do this, adding the same
argument checking as for min and min_t.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/minmax.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 5433c08fcc68..0153c9eba013 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -19,42 +19,58 @@
 #define __typecheck(x, y) \
 	(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
 
 #define __no_side_effects(x, y) \
 		(__is_constexpr(x) && __is_constexpr(y))
 
 #define __safe_cmp(x, y) \
 		(__typecheck(x, y) && __no_side_effects(x, y))
 
 #define __cmp(x, y, op)	((x) op (y) ? (x) : (y))
 
 #define __cmp_once(x, y, unique_x, unique_y, op) ({	\
 		typeof(x) unique_x = (x);		\
 		typeof(y) unique_y = (y);		\
 		__cmp(unique_x, unique_y, op); })
 
 #define __careful_cmp(x, y, op) \
 	__builtin_choose_expr(__safe_cmp(x, y), \
 		__cmp(x, y, op), \
 		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
 
+#define __clamp(val, lo, hi) \
+	((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
+
+#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({		\
+		typeof(val) unique_val = (val);					\
+		typeof(lo) unique_lo = (lo);					\
+		typeof(hi) unique_hi = (hi);					\
+		__clamp(unique_val, unique_lo, unique_hi); })
+
+#define __careful_clamp(val, lo, hi)						\
+	__builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) &&	\
+			      __is_constexpr(val) && 				\
+			      __is_constexpr(lo) && __is_constexpr(hi),		\
+		__clamp(val, lo, hi),						\
+		__clamp_once(val, lo, hi, __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi)))
+
 /**
  * min - return minimum of two values of the same or compatible types
  * @x: first value
  * @y: second value
  */
 #define min(x, y)	__careful_cmp(x, y, <)
 
 /**
  * max - return maximum of two values of the same or compatible types
  * @x: first value
  * @y: second value
  */
 #define max(x, y)	__careful_cmp(x, y, >)
 
 /**
  * min3 - return minimum of three values
  * @x: first value
  * @y: second value
  * @z: third value
  */
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
@@ -68,78 +84,78 @@
 #define max3(x, y, z) max((typeof(x))max(x, y), z)
 
 /**
  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
  * @x: value1
  * @y: value2
  */
 #define min_not_zero(x, y) ({			\
 	typeof(x) __x = (x);			\
 	typeof(y) __y = (y);			\
 	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
 
 /**
  * clamp - return a value clamped to a given range with strict typechecking
  * @val: current value
  * @lo: lowest allowable value
  * @hi: highest allowable value
  *
  * This macro does strict typechecking of @lo/@hi to make sure they are of the
  * same type as @val.  See the unnecessary pointer comparisons.
  */
-#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
 
 /*
  * ..and if you can't take the strict
  * types, you can specify one yourself.
  *
  * Or not use min/max/clamp at all, of course.
  */
 
 /**
  * min_t - return minimum of two values, using the specified type
  * @type: data type to use
  * @x: first value
  * @y: second value
  */
 #define min_t(type, x, y)	__careful_cmp((type)(x), (type)(y), <)
 
 /**
  * max_t - return maximum of two values, using the specified type
  * @type: data type to use
  * @x: first value
  * @y: second value
  */
 #define max_t(type, x, y)	__careful_cmp((type)(x), (type)(y), >)
 
 /**
  * clamp_t - return a value clamped to a given range using a given type
  * @type: the type of variable to use
  * @val: current value
  * @lo: minimum allowable value
  * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of type
  * @type to make all the comparisons.
  */
-#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
 
 /**
  * clamp_val - return a value clamped to a given range using val's type
  * @val: current value
  * @lo: minimum allowable value
  * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of whatever
  * type the input argument @val is.  This is useful when @val is an unsigned
  * type and @lo and @hi are literals that will otherwise be assigned a signed
  * integer type.
  */
 #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
 
 /**
  * swap - swap values of @a and @b
  * @a: first value
  * @b: second value
  */
 #define swap(a, b) \
 	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-- 
2.37.3
Re: [PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Andy Shevchenko 4 days, 16 hours ago
On Fri, Sep 23, 2022 at 12:06:21PM +0200, Jason A. Donenfeld wrote:
> Currently the clamp algorithm does:
> 
> 	if (val > hi)
> 		val = hi;
> 	if (val < lo)
> 		val = lo;
> 
> But since hi > lo by definition, this can be made more efficient with:

It's strongly speaking, but we have to proof that, right?
So, while I haven't checked the code, this change should also
include (does it?) the corresponding compile-time checks (for
constant arguments) in similar way how it's done for GENMASK().

Otherwise I have no objections.

> 	if (val > hi)
> 		val = hi;
> 	else if (val < lo)
> 		val = lo;
> 
> So fix up the clamp and clamp_t functions to do this, adding the same
> argument checking as for min and min_t.

-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 16 hours ago
Hi Andy,

On Fri, Sep 23, 2022 at 12:36 PM Andy Shevchenko
<andriy.shevchenko@linux.intel.com> wrote:
>
> On Fri, Sep 23, 2022 at 12:06:21PM +0200, Jason A. Donenfeld wrote:
> > Currently the clamp algorithm does:
> >
> >       if (val > hi)
> >               val = hi;
> >       if (val < lo)
> >               val = lo;
> >
> > But since hi > lo by definition, this can be made more efficient with:
>
> It's strongly speaking, but we have to proof that, right?
> So, while I haven't checked the code, this change should also
> include (does it?) the corresponding compile-time checks (for
> constant arguments) in similar way how it's done for GENMASK().
>
> Otherwise I have no objections.

I think most cases are with compile time constants, but some cases are
with variables. What should we do in that case? Checking variables at
runtime incurs the same cost as the old code. I guess we could do this
fast thing for constants and the slower old thing for non-constants?
Or not do either, keep this commit as is, and just accept that if you
pass bogus bounds to clamp, you're going to end up with something
weird, which is already the case now so not a big deal?

Jason
Re: [PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Andy Shevchenko 4 days, 12 hours ago
On Fri, Sep 23, 2022 at 12:40:47PM +0200, Jason A. Donenfeld wrote:
> On Fri, Sep 23, 2022 at 12:36 PM Andy Shevchenko
> <andriy.shevchenko@linux.intel.com> wrote:
> >
> > On Fri, Sep 23, 2022 at 12:06:21PM +0200, Jason A. Donenfeld wrote:
> > > Currently the clamp algorithm does:
> > >
> > >       if (val > hi)
> > >               val = hi;
> > >       if (val < lo)
> > >               val = lo;
> > >
> > > But since hi > lo by definition, this can be made more efficient with:
> >
> > It's strongly speaking, but we have to proof that, right?
> > So, while I haven't checked the code, this change should also
> > include (does it?) the corresponding compile-time checks (for
> > constant arguments) in similar way how it's done for GENMASK().
> >
> > Otherwise I have no objections.
> 
> I think most cases are with compile time constants, but some cases are
> with variables. What should we do in that case? Checking variables at
> runtime incurs the same cost as the old code. I guess we could do this
> fast thing for constants and the slower old thing for non-constants?
> Or not do either, keep this commit as is, and just accept that if you
> pass bogus bounds to clamp, you're going to end up with something
> weird, which is already the case now so not a big deal?

I'm talking only for the cases where we _can_ check. For variables it's
probably tricky to do at compile time if possible at all.

-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 12 hours ago
Hi Andy,

On Fri, Sep 23, 2022 at 5:11 PM Andy Shevchenko
<andriy.shevchenko@linux.intel.com> wrote:
>
> On Fri, Sep 23, 2022 at 12:40:47PM +0200, Jason A. Donenfeld wrote:
> > On Fri, Sep 23, 2022 at 12:36 PM Andy Shevchenko
> > <andriy.shevchenko@linux.intel.com> wrote:
> > >
> > > On Fri, Sep 23, 2022 at 12:06:21PM +0200, Jason A. Donenfeld wrote:
> > > > Currently the clamp algorithm does:
> > > >
> > > >       if (val > hi)
> > > >               val = hi;
> > > >       if (val < lo)
> > > >               val = lo;
> > > >
> > > > But since hi > lo by definition, this can be made more efficient with:
> > >
> > > It's strongly speaking, but we have to proof that, right?
> > > So, while I haven't checked the code, this change should also
> > > include (does it?) the corresponding compile-time checks (for
> > > constant arguments) in similar way how it's done for GENMASK().
> > >
> > > Otherwise I have no objections.
> >
> > I think most cases are with compile time constants, but some cases are
> > with variables. What should we do in that case? Checking variables at
> > runtime incurs the same cost as the old code. I guess we could do this
> > fast thing for constants and the slower old thing for non-constants?
> > Or not do either, keep this commit as is, and just accept that if you
> > pass bogus bounds to clamp, you're going to end up with something
> > weird, which is already the case now so not a big deal?
>
> I'm talking only for the cases where we _can_ check. For variables it's
> probably tricky to do at compile time if possible at all.

Okay, sure, I'll add a check in the case where we can check.

Jason
Re: [PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 16 hours ago
Hey again,

On Fri, Sep 23, 2022 at 12:40 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> Hi Andy,
>
> On Fri, Sep 23, 2022 at 12:36 PM Andy Shevchenko
> <andriy.shevchenko@linux.intel.com> wrote:
> >
> > On Fri, Sep 23, 2022 at 12:06:21PM +0200, Jason A. Donenfeld wrote:
> > > Currently the clamp algorithm does:
> > >
> > >       if (val > hi)
> > >               val = hi;
> > >       if (val < lo)
> > >               val = lo;
> > >
> > > But since hi > lo by definition, this can be made more efficient with:
> >
> > It's strongly speaking, but we have to proof that, right?
> > So, while I haven't checked the code, this change should also
> > include (does it?) the corresponding compile-time checks (for
> > constant arguments) in similar way how it's done for GENMASK().
> >
> > Otherwise I have no objections.
>
> I think most cases are with compile time constants, but some cases are
> with variables. What should we do in that case? Checking variables at
> runtime incurs the same cost as the old code. I guess we could do this
> fast thing for constants and the slower old thing for non-constants?
> Or not do either, keep this commit as is, and just accept that if you
> pass bogus bounds to clamp, you're going to end up with something
> weird, which is already the case now so not a big deal?

Actually, yea, I think we should keep this commit as-is and not add
additional checking becauseeeee not only is hi>lo by definition, but
both for the old code and for the new code, the result of lo>hi is
total nonsense:

Assuming hi > lo, these snippets all yield the same result:

        if (val > hi)
                val = hi;
        if (val < lo)
                val = lo;

        if (val > hi)
                val = hi;
        else if (val < lo)
                val = lo;

        if (val < lo)
                val = lo;
        if (val > hi)
                val = hi;

        if (val < lo)
                val = lo;
        else if (val > hi)
                val = hi;

Assuming lo > hi, and the first condition triggers, these snippets all
yield different results, all of which are undefined nonsense:

        if (val > hi)
                val = hi;
        if (val < lo)
                val = lo;
--> val is lo

        if (val > hi)
                val = hi;
        else if (val < lo)
                val = lo;
--> val is hi

        if (val < lo)
                val = lo;
        if (val > hi)
                val = hi;
--> val is hi

        if (val < lo)
                val = lo;
        else if (val > hi)
                val = hi;
--> val is lo

Jason
Re: [PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Andy Shevchenko 4 days, 12 hours ago
On Fri, Sep 23, 2022 at 12:48:48PM +0200, Jason A. Donenfeld wrote:
> On Fri, Sep 23, 2022 at 12:40 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:

...

> both for the old code and for the new code, the result of lo>hi is
> total nonsense:

Exactly and my point is to add at least compile-time check for constants.

-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 12 hours ago
On Fri, Sep 23, 2022 at 5:12 PM Andy Shevchenko
<andriy.shevchenko@linux.intel.com> wrote:
>
> On Fri, Sep 23, 2022 at 12:48:48PM +0200, Jason A. Donenfeld wrote:
> > On Fri, Sep 23, 2022 at 12:40 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> ...
>
> > both for the old code and for the new code, the result of lo>hi is
> > total nonsense:
>
> Exactly and my point is to add at least compile-time check for constants.

Cool okay. Rolling v2 now.

Jason
[PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 11 hours ago
Currently the clamp algorithm does:

	if (val > hi)
		val = hi;
	if (val < lo)
		val = lo;

But since hi > lo by definition, this can be made more efficient with:

	if (val > hi)
		val = hi;
	else if (val < lo)
		val = lo;

So fix up the clamp and clamp_t functions to do this, adding the same
argument checking as for min and min_t.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/minmax.h | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 5433c08fcc68..30e2e2cd0f44 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -37,6 +37,27 @@
 		__cmp(x, y, op), \
 		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
 
+#define __clamp(val, lo, hi)							\
+	((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
+
+#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({		\
+		typeof(val) unique_val = (val);					\
+		typeof(lo) unique_lo = (lo);					\
+		typeof(hi) unique_hi = (hi);					\
+		__clamp(unique_val, unique_lo, unique_hi); })
+
+#define __clamp_input_check(lo, hi)						\
+        (BUILD_BUG_ON_ZERO(__builtin_choose_expr(				\
+                __is_constexpr((lo) > (hi)), (lo) > (hi), false)))
+
+#define __careful_clamp(val, lo, hi) ({						\
+	__clamp_input_check(lo, hi) + 						\
+	__builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) &&	\
+			      __typecheck(hi, lo) && __is_constexpr(val) && 	\
+			      __is_constexpr(lo) && __is_constexpr(hi),		\
+		__clamp(val, lo, hi),						\
+		__clamp_once(val, lo, hi, __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
+
 /**
  * min - return minimum of two values of the same or compatible types
  * @x: first value
@@ -86,7 +107,7 @@
  * This macro does strict typechecking of @lo/@hi to make sure they are of the
  * same type as @val.  See the unnecessary pointer comparisons.
  */
-#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
 
 /*
  * ..and if you can't take the strict
@@ -121,7 +142,7 @@
  * This macro does no typechecking and uses temporary variables of type
  * @type to make all the comparisons.
  */
-#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
 
 /**
  * clamp_val - return a value clamped to a given range using val's type
-- 
2.37.3
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Andrew Morton 4 days, 4 hours ago
On Fri, 23 Sep 2022 17:40:01 +0200 "Jason A. Donenfeld" <Jason@zx2c4.com> wrote:

> Currently the clamp algorithm does:
> 
> 	if (val > hi)
> 		val = hi;
> 	if (val < lo)
> 		val = lo;
> 
> But since hi > lo by definition, this can be made more efficient with:
> 
> 	if (val > hi)
> 		val = hi;
> 	else if (val < lo)
> 		val = lo;
> 
> So fix up the clamp and clamp_t functions to do this, adding the same
> argument checking as for min and min_t.
> 

The patch adds 140 bytes of text to mm/memblock.o, for example. 
Presumably from the additional branch.  Larger text means larger cache
footprint means slower.

So where's the proof that this change gives us a more efficient kernel?
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 3 days, 16 hours ago
Hi Andrew,

On Fri, Sep 23, 2022 at 03:54:12PM -0700, Andrew Morton wrote:
> On Fri, 23 Sep 2022 17:40:01 +0200 "Jason A. Donenfeld" <Jason@zx2c4.com> wrote:
> 
> > Currently the clamp algorithm does:
> > 
> > 	if (val > hi)
> > 		val = hi;
> > 	if (val < lo)
> > 		val = lo;
> > 
> > But since hi > lo by definition, this can be made more efficient with:
> > 
> > 	if (val > hi)
> > 		val = hi;
> > 	else if (val < lo)
> > 		val = lo;
> > 
> > So fix up the clamp and clamp_t functions to do this, adding the same
> > argument checking as for min and min_t.
> > 
> 
> The patch adds 140 bytes of text to mm/memblock.o, for example. 
> Presumably from the additional branch.  Larger text means larger cache
> footprint means slower.
> 
> So where's the proof that this change gives us a more efficient kernel?

For x86, code generation ought to be the same, because the compiler can
still generate cmovs for all:

unsigned int clamp1(unsigned int val, unsigned int lo, unsigned int hi)
{
    if (val >= hi)
        val = hi;
    if (val <= lo)
        val = lo;
    return val;
}

unsigned int clamp2(unsigned int val, unsigned int lo, unsigned int hi)
{
    if (val >= hi)
        val = hi;
    else if (val <= lo)
        val = lo;
    return val;
}

On x86_64 this is:

clamp1:
        cmp     edi, edx
        mov     eax, esi
        cmova   edi, edx
        cmp     edi, esi
        cmovnb  eax, edi
        ret
clamp2:
        cmp     edi, esi
        mov     eax, edx
        cmovnb  esi, edi
        cmp     edi, edx
        cmovb   eax, esi
        ret

The latter one clever compares hi and lo first. I observe the same when
hi and lo are constants instead. So no change.

On ARM64 the same thing happens:

clamp1:
        cmp     w0, w2
        csel    w8, w0, w2, lo
        cmp     w8, w1
        csel    w0, w8, w1, hi
        ret
clamp2:
        cmp     w0, w1
        csel    w8, w0, w1, hi
        cmp     w0, w2
        csel    w0, w8, w2, lo
        ret

On MIPS64, on the other hand, we save some arithmetic and the number of
branches remains the same:

clamp1:
        sltu    $3,$6,$4
        bne     $3,$0,.L2
        move    $2,$6

        move    $2,$4
.L2:
        sltu    $3,$2,$5
        bnel    $3,$0,.L7
        move    $2,$5

.L7:
        jr      $31
        nop

clamp2:
        sltu    $3,$4,$6
        beq     $3,$0,.L13
        move    $2,$6

        sltu    $3,$4,$5
        bne     $3,$0,.L12
        move    $2,$4

.L13:
        jr      $31
        nop

.L12:
        jr      $31
        move    $2,$5

So it seems like, at least in isolation, this is only a win?

It's possible that when inlined into a more complex function that the
various cases are optimized together and a branch is introduced if the
compiler thinks its better; but alone I'm not seeing that happen.

Or maybe older compilers do something worse? On x86_64, clang doesn't do
the smart thing until clang 13 and gcc not until gcc 11. So maybe your
text size blew up because your compiler is old?

Either way, I agree that text size increase is not a good idea, and we
should avoid that if we can.

Worth noting, by the way, is that the input validation check already
caught a bug when 0day test bot choked:

https://lore.kernel.org/linux-hwmon/20220924101151.4168414-1-Jason@zx2c4.com/

So, options:
1) Keep this patch as-is, because it is useful on modern compilers.
2) Add an ifdef on compiler version, so we generate the best code in
   each case.
2) Go back to testing twice, but keep the checker macro because it's
   apparently useful.
4) Do nothing and discard this series.

Any of those are okay with me. Opinions?

Jason
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Andy Shevchenko 1 day, 17 hours ago
On Sat, Sep 24, 2022 at 12:37:26PM +0200, Jason A. Donenfeld wrote:
> On Fri, Sep 23, 2022 at 03:54:12PM -0700, Andrew Morton wrote:
> > On Fri, 23 Sep 2022 17:40:01 +0200 "Jason A. Donenfeld" <Jason@zx2c4.com> wrote:

...

> Worth noting, by the way, is that the input validation check already
> caught a bug when 0day test bot choked:
> 
> https://lore.kernel.org/linux-hwmon/20220924101151.4168414-1-Jason@zx2c4.com/

Hooray, it was a good idea! :-)

> So, options:
> 1) Keep this patch as-is, because it is useful on modern compilers.
> 2) Add an ifdef on compiler version, so we generate the best code in
>    each case.
> 3) Go back to testing twice, but keep the checker macro because it's
>    apparently useful.
> 4) Do nothing and discard this series.
> 
> Any of those are okay with me. Opinions?

I tend to case 3) (I believe you typo'ed double 2) cases) and apply the rest
as a separate change with all downsides explained (kinda 1) approach).

-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 1 day, 14 hours ago
On Mon, Sep 26, 2022 at 12:00 PM Andy Shevchenko
<andriy.shevchenko@linux.intel.com> wrote:
>
> On Sat, Sep 24, 2022 at 12:37:26PM +0200, Jason A. Donenfeld wrote:
> > On Fri, Sep 23, 2022 at 03:54:12PM -0700, Andrew Morton wrote:
> > > On Fri, 23 Sep 2022 17:40:01 +0200 "Jason A. Donenfeld" <Jason@zx2c4.com> wrote:
>
> ...
>
> > Worth noting, by the way, is that the input validation check already
> > caught a bug when 0day test bot choked:
> >
> > https://lore.kernel.org/linux-hwmon/20220924101151.4168414-1-Jason@zx2c4.com/
>
> Hooray, it was a good idea! :-)
>
> > So, options:
> > 1) Keep this patch as-is, because it is useful on modern compilers.
> > 2) Add an ifdef on compiler version, so we generate the best code in
> >    each case.
> > 3) Go back to testing twice, but keep the checker macro because it's
> >    apparently useful.
> > 4) Do nothing and discard this series.
> >
> > Any of those are okay with me. Opinions?
>
> I tend to case 3) (I believe you typo'ed double 2) cases) and apply the rest
> as a separate change with all downsides explained (kinda 1) approach).

Alright, I'll do that. v3 on its way, then.
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Kees Cook 1 day, 8 hours ago
On Mon, Sep 26, 2022 at 02:23:48PM +0200, Jason A. Donenfeld wrote:
> On Mon, Sep 26, 2022 at 12:00 PM Andy Shevchenko
> <andriy.shevchenko@linux.intel.com> wrote:
> >
> > On Sat, Sep 24, 2022 at 12:37:26PM +0200, Jason A. Donenfeld wrote:
> > > On Fri, Sep 23, 2022 at 03:54:12PM -0700, Andrew Morton wrote:
> > > > On Fri, 23 Sep 2022 17:40:01 +0200 "Jason A. Donenfeld" <Jason@zx2c4.com> wrote:
> >
> > ...
> >
> > > Worth noting, by the way, is that the input validation check already
> > > caught a bug when 0day test bot choked:
> > >
> > > https://lore.kernel.org/linux-hwmon/20220924101151.4168414-1-Jason@zx2c4.com/
> >
> > Hooray, it was a good idea! :-)
> >
> > > So, options:
> > > 1) Keep this patch as-is, because it is useful on modern compilers.
> > > 2) Add an ifdef on compiler version, so we generate the best code in
> > >    each case.
> > > 3) Go back to testing twice, but keep the checker macro because it's
> > >    apparently useful.
> > > 4) Do nothing and discard this series.
> > >
> > > Any of those are okay with me. Opinions?
> >
> > I tend to case 3) (I believe you typo'ed double 2) cases) and apply the rest
> > as a separate change with all downsides explained (kinda 1) approach).
> 
> Alright, I'll do that. v3 on its way, then.

Cool. I've dropped v2 from my -next tree.

-- 
Kees Cook
[PATCH v3 1/2] minmax: sanity check constant bounds when clamping
Posted by Jason A. Donenfeld 1 day, 13 hours ago
The clamp family of functions only makes sense if hi>=lo. If hi and lo
are compile-time constants, then raise a build error. Doing so has
already caught buggy code. This also introduces the infrastructure to
improve the clamping function in subsequent commits.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/minmax.h | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 5433c08fcc68..293a66ad7830 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -37,6 +37,28 @@
 		__cmp(x, y, op), \
 		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
 
+#define __clamp(val, lo, hi)							\
+	__cmp(__cmp(val, lo, >), hi, <)
+
+#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({		\
+		typeof(val) unique_val = (val);					\
+		typeof(lo) unique_lo = (lo);					\
+		typeof(hi) unique_hi = (hi);					\
+		__clamp(unique_val, unique_lo, unique_hi); })
+
+#define __clamp_input_check(lo, hi)						\
+        (BUILD_BUG_ON_ZERO(__builtin_choose_expr(				\
+                __is_constexpr((lo) > (hi)), (lo) > (hi), false)))
+
+#define __careful_clamp(val, lo, hi) ({						\
+	__clamp_input_check(lo, hi) + 						\
+	__builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) &&	\
+			      __typecheck(hi, lo) && __is_constexpr(val) && 	\
+			      __is_constexpr(lo) && __is_constexpr(hi),		\
+		__clamp(val, lo, hi),						\
+		__clamp_once(val, lo, hi, __UNIQUE_ID(__val),			\
+			     __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
+
 /**
  * min - return minimum of two values of the same or compatible types
  * @x: first value
@@ -86,7 +108,7 @@
  * This macro does strict typechecking of @lo/@hi to make sure they are of the
  * same type as @val.  See the unnecessary pointer comparisons.
  */
-#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
 
 /*
  * ..and if you can't take the strict
@@ -121,7 +143,7 @@
  * This macro does no typechecking and uses temporary variables of type
  * @type to make all the comparisons.
  */
-#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
 
 /**
  * clamp_val - return a value clamped to a given range using val's type
-- 
2.37.3
Re: [PATCH v3 1/2] minmax: sanity check constant bounds when clamping
Posted by Kees Cook 1 day, 8 hours ago
On Mon, Sep 26, 2022 at 03:34:34PM +0200, Jason A. Donenfeld wrote:
> The clamp family of functions only makes sense if hi>=lo. If hi and lo
> are compile-time constants, then raise a build error. Doing so has
> already caught buggy code. This also introduces the infrastructure to
> improve the clamping function in subsequent commits.
> 
> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Kees Cook <keescook@chromium.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>

Reviewed-by: Kees Cook <keescook@chromium.org>

-- 
Kees Cook
Re: [PATCH v3 1/2] minmax: sanity check constant bounds when clamping
Posted by Andy Shevchenko 1 day, 13 hours ago
On Mon, Sep 26, 2022 at 03:34:34PM +0200, Jason A. Donenfeld wrote:
> The clamp family of functions only makes sense if hi>=lo. If hi and lo
> are compile-time constants, then raise a build error. Doing so has
> already caught buggy code. This also introduces the infrastructure to
> improve the clamping function in subsequent commits.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Thanks!

> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Kees Cook <keescook@chromium.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>  include/linux/minmax.h | 26 ++++++++++++++++++++++++--
>  1 file changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/minmax.h b/include/linux/minmax.h
> index 5433c08fcc68..293a66ad7830 100644
> --- a/include/linux/minmax.h
> +++ b/include/linux/minmax.h
> @@ -37,6 +37,28 @@
>  		__cmp(x, y, op), \
>  		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
>  
> +#define __clamp(val, lo, hi)							\
> +	__cmp(__cmp(val, lo, >), hi, <)
> +
> +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({		\
> +		typeof(val) unique_val = (val);					\
> +		typeof(lo) unique_lo = (lo);					\
> +		typeof(hi) unique_hi = (hi);					\
> +		__clamp(unique_val, unique_lo, unique_hi); })
> +
> +#define __clamp_input_check(lo, hi)						\
> +        (BUILD_BUG_ON_ZERO(__builtin_choose_expr(				\
> +                __is_constexpr((lo) > (hi)), (lo) > (hi), false)))
> +
> +#define __careful_clamp(val, lo, hi) ({						\
> +	__clamp_input_check(lo, hi) + 						\
> +	__builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) &&	\
> +			      __typecheck(hi, lo) && __is_constexpr(val) && 	\
> +			      __is_constexpr(lo) && __is_constexpr(hi),		\
> +		__clamp(val, lo, hi),						\
> +		__clamp_once(val, lo, hi, __UNIQUE_ID(__val),			\
> +			     __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
> +
>  /**
>   * min - return minimum of two values of the same or compatible types
>   * @x: first value
> @@ -86,7 +108,7 @@
>   * This macro does strict typechecking of @lo/@hi to make sure they are of the
>   * same type as @val.  See the unnecessary pointer comparisons.
>   */
> -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
> +#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
>  
>  /*
>   * ..and if you can't take the strict
> @@ -121,7 +143,7 @@
>   * This macro does no typechecking and uses temporary variables of type
>   * @type to make all the comparisons.
>   */
> -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
> +#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
>  
>  /**
>   * clamp_val - return a value clamped to a given range using val's type
> -- 
> 2.37.3
> 

-- 
With Best Regards,
Andy Shevchenko
[PATCH v3 2/2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 1 day, 13 hours ago
Currently the clamp algorithm does:

    if (val > hi)
        val = hi;
    if (val < lo)
        val = lo;

But since hi > lo by definition, this can be made more efficient with:

    if (val > hi)
        val = hi;
    else if (val < lo)
        val = lo;

So fix up the clamp and clamp_t functions to do this, adding the same
argument checking as for min and min_t.

For simple cases, code generation on x86_64 and aarch64 stay about the
same:

    before:
            cmp     edi, edx
            mov     eax, esi
            cmova   edi, edx
            cmp     edi, esi
            cmovnb  eax, edi
            ret
    after:
            cmp     edi, esi
            mov     eax, edx
            cmovnb  esi, edi
            cmp     edi, edx
            cmovb   eax, esi
            ret

    before:
            cmp     w0, w2
            csel    w8, w0, w2, lo
            cmp     w8, w1
            csel    w0, w8, w1, hi
            ret
    after:
            cmp     w0, w1
            csel    w8, w0, w1, hi
            cmp     w0, w2
            csel    w0, w8, w2, lo
            ret

On MIPS64, however, code generation improves, by removing arithmetic in
the second branch:

    before:
            sltu    $3,$6,$4
            bne     $3,$0,.L2
            move    $2,$6

            move    $2,$4
    .L2:
            sltu    $3,$2,$5
            bnel    $3,$0,.L7
            move    $2,$5

    .L7:
            jr      $31
            nop
    after:
            sltu    $3,$4,$6
            beq     $3,$0,.L13
            move    $2,$6

            sltu    $3,$4,$5
            bne     $3,$0,.L12
            move    $2,$4

    .L13:
            jr      $31
            nop

    .L12:
            jr      $31
            move    $2,$5

For more complex cases with surrounding code, the effects are a bit
more complicated. For example, consider this simplified version of
timestamp_truncate() from fs/inode.c on x86_64:

    struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
    {
        struct super_block *sb = inode->i_sb;
        unsigned int gran = sb->s_time_gran;

        t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
        if (t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min)
            t.tv_nsec = 0;
        return t;
    }

    before:
            mov     r8, rdx
            mov     rdx, rsi
            mov     rcx, QWORD PTR [r8]
            mov     rax, QWORD PTR [rcx+8]
            mov     rcx, QWORD PTR [rcx+16]
            cmp     rax, rdi
            mov     r8, rcx
            cmovge  rdi, rax
            cmp     rdi, rcx
            cmovle  r8, rdi
            cmp     rax, r8
            je      .L4
            cmp     rdi, rcx
            jge     .L4
            mov     rax, r8
            ret
    .L4:
            xor     edx, edx
            mov     rax, r8
            ret

    after:
            mov     rax, QWORD PTR [rdx]
            mov     rdx, QWORD PTR [rax+8]
            mov     rax, QWORD PTR [rax+16]
            cmp     rax, rdi
            jg      .L6
            mov     r8, rax
            xor     edx, edx
    .L2:
            mov     rax, r8
            ret
    .L6:
            cmp     rdx, rdi
            mov     r8, rdi
            cmovge  r8, rdx
            cmp     rax, r8
            je      .L4
            xor     eax, eax
            cmp     rdx, rdi
            cmovl   rax, rsi
            mov     rdx, rax
            mov     rax, r8
            ret
    .L4:
            xor     edx, edx
            jmp     .L2

In this case, we actually gain a branch, unfortunately, because the
compiler's replacement axioms no longer as cleanly apply.

So all and all, this change is a bit of a mixed bag.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/minmax.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 293a66ad7830..9e7cc5319676 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -38,7 +38,7 @@
 		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
 
 #define __clamp(val, lo, hi)							\
-	__cmp(__cmp(val, lo, >), hi, <)
+	((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
 
 #define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({		\
 		typeof(val) unique_val = (val);					\
-- 
2.37.3
Re: [PATCH v3 2/2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Kees Cook 1 day, 8 hours ago
On Mon, Sep 26, 2022 at 03:34:35PM +0200, Jason A. Donenfeld wrote:
> [...]
> In this case, we actually gain a branch, unfortunately, because the
> compiler's replacement axioms no longer as cleanly apply.
> 
> So all and all, this change is a bit of a mixed bag.

I'm on the fence -- I think the new macro is a more correct way to
describe the operation, though on the other hand, the old way provides a
simple way to compose the bounds checks.

I suspect we should probably optimize for _performance_, not code size,
so if the new branch is actually visible via cycle counts in "perf"
output, probably we shouldn't use this patch, and instead add a comment
about why it is defined the way it is.

-Kees

-- 
Kees Cook
Re: [PATCH v3 2/2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 1 day, 5 hours ago
On Mon, Sep 26, 2022 at 8:30 PM Kees Cook <keescook@chromium.org> wrote:
>
> On Mon, Sep 26, 2022 at 03:34:35PM +0200, Jason A. Donenfeld wrote:
> > [...]
> > In this case, we actually gain a branch, unfortunately, because the
> > compiler's replacement axioms no longer as cleanly apply.
> >
> > So all and all, this change is a bit of a mixed bag.
>
> I'm on the fence -- I think the new macro is a more correct way to
> describe the operation, though on the other hand, the old way provides a
> simple way to compose the bounds checks.
>
> I suspect we should probably optimize for _performance_, not code size,
> so if the new branch is actually visible via cycle counts in "perf"
> output, probably we shouldn't use this patch, and instead add a comment
> about why it is defined the way it is.

I *want* the better algorithm to yield better performance, because
that's a much less confusing world. But it seems like we have grounds
for suspecting that might not be the case. So until I come up with
some real measurements, I agree we should hold off on this 2/2.

Jason
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Andrew Morton 2 days, 10 hours ago
On Sat, 24 Sep 2022 12:37:26 +0200 "Jason A. Donenfeld" <Jason@zx2c4.com> wrote:

> Or maybe older compilers do something worse? On x86_64, clang doesn't do
> the smart thing until clang 13 and gcc not until gcc 11. So maybe your
> text size blew up because your compiler is old?

I used gcc-11.1.0.
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Kees Cook 4 days, 3 hours ago
On Fri, Sep 23, 2022 at 03:54:12PM -0700, Andrew Morton wrote:
> On Fri, 23 Sep 2022 17:40:01 +0200 "Jason A. Donenfeld" <Jason@zx2c4.com> wrote:
> 
> > Currently the clamp algorithm does:
> > 
> > 	if (val > hi)
> > 		val = hi;
> > 	if (val < lo)
> > 		val = lo;
> > 
> > But since hi > lo by definition, this can be made more efficient with:
> > 
> > 	if (val > hi)
> > 		val = hi;
> > 	else if (val < lo)
> > 		val = lo;
> > 
> > So fix up the clamp and clamp_t functions to do this, adding the same
> > argument checking as for min and min_t.
> > 
> 
> The patch adds 140 bytes of text to mm/memblock.o, for example. 
> Presumably from the additional branch.  Larger text means larger cache
> footprint means slower.

Oh, interesting. I had spot-checked one clamp-using function (update_cfs_group)
and it produced the same output just with some register swapping and other
ordering changes. Hmm.

But yes, text is bigger, but bss is smaller. This are my allmodconfig builds:

   text        data         bss          dec       hex  filename
43779952    59510881    28684428    131975261   7ddc85d vmlinux.before
43781295    59510889    28676236    131968420   7ddada4 vmlinux

> So where's the proof that this change gives us a more efficient kernel?

A reasonable question. :)

-- 
Kees Cook
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Kees Cook 4 days, 7 hours ago
On Fri, 23 Sep 2022 17:40:01 +0200, Jason A. Donenfeld wrote:
> Currently the clamp algorithm does:
> 
> 	if (val > hi)
> 		val = hi;
> 	if (val < lo)
> 		val = lo;
> 
> [...]

Applied (with the little changes mentioned in v2 thread) to
for-next/hardening, thanks!

[1/1] minmax: clamp more efficiently by avoiding extra comparison
      https://git.kernel.org/kees/c/97dd4f05a2ce

-- 
Kees Cook
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Kees Cook 4 days, 10 hours ago
On Fri, Sep 23, 2022 at 05:40:01PM +0200, Jason A. Donenfeld wrote:
> Currently the clamp algorithm does:
> 
> 	if (val > hi)
> 		val = hi;
> 	if (val < lo)
> 		val = lo;
> 
> But since hi > lo by definition, this can be made more efficient with:
> 
> 	if (val > hi)
> 		val = hi;
> 	else if (val < lo)
> 		val = lo;
> 
> So fix up the clamp and clamp_t functions to do this, adding the same
> argument checking as for min and min_t.
> 
> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Kees Cook <keescook@chromium.org>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>  include/linux/minmax.h | 25 +++++++++++++++++++++++--
>  1 file changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/minmax.h b/include/linux/minmax.h
> index 5433c08fcc68..30e2e2cd0f44 100644
> --- a/include/linux/minmax.h
> +++ b/include/linux/minmax.h
> @@ -37,6 +37,27 @@
>  		__cmp(x, y, op), \
>  		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
>  
> +#define __clamp(val, lo, hi)							\
> +	((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
> +
> +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({		\
> +		typeof(val) unique_val = (val);					\
> +		typeof(lo) unique_lo = (lo);					\
> +		typeof(hi) unique_hi = (hi);					\
> +		__clamp(unique_val, unique_lo, unique_hi); })
> +
> +#define __clamp_input_check(lo, hi)						\
> +        (BUILD_BUG_ON_ZERO(__builtin_choose_expr(				\
> +                __is_constexpr((lo) > (hi)), (lo) > (hi), false)))

Nice. :)

> +
> +#define __careful_clamp(val, lo, hi) ({						\
> +	__clamp_input_check(lo, hi) + 						\
> +	__builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) &&	\
> +			      __typecheck(hi, lo) && __is_constexpr(val) && 	\
> +			      __is_constexpr(lo) && __is_constexpr(hi),		\

I really like it! I might have used:

	__safe_cmp(val, lo) && __safe_cmp(val, hi)

instead of the "open coded" __typecheck()s and __is_constexpr()s, but
it's the same result.

> +		__clamp(val, lo, hi),						\
> +		__clamp_once(val, lo, hi, __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })

*complaint about line being >100 characters, but I don't really care* If
anyone is really bothered, this looks fine, too:

		__clamp_once(val, lo, hi,					\
			     __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })

*shrug*

> +
>  /**
>   * min - return minimum of two values of the same or compatible types
>   * @x: first value
> @@ -86,7 +107,7 @@
>   * This macro does strict typechecking of @lo/@hi to make sure they are of the
>   * same type as @val.  See the unnecessary pointer comparisons.
>   */
> -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
> +#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
>  
>  /*
>   * ..and if you can't take the strict
> @@ -121,7 +142,7 @@
>   * This macro does no typechecking and uses temporary variables of type
>   * @type to make all the comparisons.
>   */
> -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
> +#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
>  
>  /**
>   * clamp_val - return a value clamped to a given range using val's type
> -- 
> 2.37.3
> 

Reviewed-by: Kees Cook <keescook@chromium.org>

I can take this unless akpm wants it?

-- 
Kees Cook
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Andy Shevchenko 4 days, 10 hours ago
On Fri, Sep 23, 2022 at 09:41:19AM -0700, Kees Cook wrote:
> On Fri, Sep 23, 2022 at 05:40:01PM +0200, Jason A. Donenfeld wrote:

...

> > +		__clamp(val, lo, hi),						\
> > +		__clamp_once(val, lo, hi, __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
> 
> *complaint about line being >100 characters, but I don't really care* If
> anyone is really bothered, this looks fine, too:
> 
> 		__clamp_once(val, lo, hi,					\
> 			     __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })

Actually }) should occupy a separate line and it would be nice to have it for ({.

-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 10 hours ago
On Fri, Sep 23, 2022 at 6:53 PM Andy Shevchenko
<andriy.shevchenko@linux.intel.com> wrote:
>
> On Fri, Sep 23, 2022 at 09:41:19AM -0700, Kees Cook wrote:
> > On Fri, Sep 23, 2022 at 05:40:01PM +0200, Jason A. Donenfeld wrote:
>
> ...
>
> > > +           __clamp(val, lo, hi),                                           \
> > > +           __clamp_once(val, lo, hi, __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
> >
> > *complaint about line being >100 characters, but I don't really care* If
> > anyone is really bothered, this looks fine, too:
> >
> >               __clamp_once(val, lo, hi,                                       \
> >                            __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
>
> Actually }) should occupy a separate line and it would be nice to have it for ({.

That's what I would have done, except another macro in there didn't do
it like that, so I tried to copy the existing form.

Kees - do you want to touch up stylistic things as you see fit upon
commit, or do you want a v3 from me?

Jason
Re: [PATCH v2] minmax: clamp more efficiently by avoiding extra comparison
Posted by Jason A. Donenfeld 4 days, 10 hours ago
On Fri, Sep 23, 2022 at 6:41 PM Kees Cook <keescook@chromium.org> wrote:
>
> On Fri, Sep 23, 2022 at 05:40:01PM +0200, Jason A. Donenfeld wrote:
> > Currently the clamp algorithm does:
> >
> >       if (val > hi)
> >               val = hi;
> >       if (val < lo)
> >               val = lo;
> >
> > But since hi > lo by definition, this can be made more efficient with:
> >
> >       if (val > hi)
> >               val = hi;
> >       else if (val < lo)
> >               val = lo;
> >
> > So fix up the clamp and clamp_t functions to do this, adding the same
> > argument checking as for min and min_t.
> >
> > Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> > Cc: Andrew Morton <akpm@linux-foundation.org>
> > Cc: Kees Cook <keescook@chromium.org>
> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> > ---
> >  include/linux/minmax.h | 25 +++++++++++++++++++++++--
> >  1 file changed, 23 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/minmax.h b/include/linux/minmax.h
> > index 5433c08fcc68..30e2e2cd0f44 100644
> > --- a/include/linux/minmax.h
> > +++ b/include/linux/minmax.h
> > @@ -37,6 +37,27 @@
> >               __cmp(x, y, op), \
> >               __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
> >
> > +#define __clamp(val, lo, hi)                                                 \
> > +     ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
> > +
> > +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({               \
> > +             typeof(val) unique_val = (val);                                 \
> > +             typeof(lo) unique_lo = (lo);                                    \
> > +             typeof(hi) unique_hi = (hi);                                    \
> > +             __clamp(unique_val, unique_lo, unique_hi); })
> > +
> > +#define __clamp_input_check(lo, hi)                                          \
> > +        (BUILD_BUG_ON_ZERO(__builtin_choose_expr(                            \
> > +                __is_constexpr((lo) > (hi)), (lo) > (hi), false)))
>
> Nice. :)
>
> > +
> > +#define __careful_clamp(val, lo, hi) ({                                              \
> > +     __clamp_input_check(lo, hi) +                                           \
> > +     __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) &&   \
> > +                           __typecheck(hi, lo) && __is_constexpr(val) &&     \
> > +                           __is_constexpr(lo) && __is_constexpr(hi),         \
>
> I really like it! I might have used:
>
>         __safe_cmp(val, lo) && __safe_cmp(val, hi)
>
> instead of the "open coded" __typecheck()s and __is_constexpr()s, but
> it's the same result.
>
> > +             __clamp(val, lo, hi),                                           \
> > +             __clamp_once(val, lo, hi, __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
>
> *complaint about line being >100 characters, but I don't really care* If
> anyone is really bothered, this looks fine, too:
>
>                 __clamp_once(val, lo, hi,                                       \
>                              __UNIQUE_ID(__val), __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); })
>
> *shrug*
>
> > +
> >  /**
> >   * min - return minimum of two values of the same or compatible types
> >   * @x: first value
> > @@ -86,7 +107,7 @@
> >   * This macro does strict typechecking of @lo/@hi to make sure they are of the
> >   * same type as @val.  See the unnecessary pointer comparisons.
> >   */
> > -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
> > +#define clamp(val, lo, hi) __careful_clamp(val, lo, hi)
> >
> >  /*
> >   * ..and if you can't take the strict
> > @@ -121,7 +142,7 @@
> >   * This macro does no typechecking and uses temporary variables of type
> >   * @type to make all the comparisons.
> >   */
> > -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
> > +#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi))
> >
> >  /**
> >   * clamp_val - return a value clamped to a given range using val's type
> > --
> > 2.37.3
> >
>
> Reviewed-by: Kees Cook <keescook@chromium.org>
>
> I can take this unless akpm wants it?

Fine by me.