arch/x86/include/asm/div64.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-)
Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't
fit u64, this matches the generic implementation in lib/math/div64.c.
Reported-by: "Li,Rongqing" <lirongqing@baidu.com>
Link: https://lore.kernel.org/all/78a0d7bb20504c0884d474868eccd858@baidu.com/
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
arch/x86/include/asm/div64.h | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..dfd25cfd1c33 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -79,20 +79,27 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
#else
# include <asm-generic/div64.h>
+# include <asm/asm.h>
+# include <asm/bug.h>
/*
- * Will generate an #DE when the result doesn't fit u64, could fix with an
- * __ex_table[] entry when it becomes an issue.
+ * Returns ULONG_MAX when the result doesn't fit u64.
*/
static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
{
+ int ok = 0;
u64 q;
- asm ("mulq %2; divq %3" : "=a" (q)
- : "a" (a), "rm" (mul), "rm" (div)
- : "rdx");
+ asm ("mulq %3; 1: divq %4; movl $1,%1; 2:\n"
+ _ASM_EXTABLE(1b, 2b)
+ : "=a" (q), "+r" (ok)
+ : "a" (a), "rm" (mul), "rm" (div)
+ : "rdx");
- return q;
+ if (ok)
+ return q;
+ WARN_ON_ONCE(!div);
+ return ~(u64)0;
}
#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
--
2.25.1.362.g51ebf55
On Mon, 21 Jul 2025 15:04:22 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't > fit u64, this matches the generic implementation in lib/math/div64.c. Not quite, the generic version is likely to trap on divide by zero. I think it would be better to always trap (eg BUG_ON(!div)). Alternatively have a function that returns (ok, overflow, infinity, NaN) to its caller - passing the buck. Although, perhaps, in a way that the the callers can ignore. The trouble there is that (an ignored) ~(u64)0 is likely to cause another arithmetic overflow with even more consequences. So I'm not at all sure what it should look like or whether 0 is a better error return (esp for div == 0). Even for the scheduler that recently trapped here, the 64bit sum was about the wrap and the very small value returned then almost certainly wouldn't have been handled correctly. (So that code needs fixing anyway to avoid the overflow.) > > Reported-by: "Li,Rongqing" <lirongqing@baidu.com> > Link: https://lore.kernel.org/all/78a0d7bb20504c0884d474868eccd858@baidu.com/ > Signed-off-by: Oleg Nesterov <oleg@redhat.com> > --- > arch/x86/include/asm/div64.h | 19 +++++++++++++------ > 1 file changed, 13 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h > index 9931e4c7d73f..dfd25cfd1c33 100644 > --- a/arch/x86/include/asm/div64.h > +++ b/arch/x86/include/asm/div64.h > @@ -79,20 +79,27 @@ static inline u64 mul_u32_u32(u32 a, u32 b) > > #else > # include <asm-generic/div64.h> > +# include <asm/asm.h> > +# include <asm/bug.h> > > /* > - * Will generate an #DE when the result doesn't fit u64, could fix with an > - * __ex_table[] entry when it becomes an issue. > + * Returns ULONG_MAX when the result doesn't fit u64. > */ > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > { > + int ok = 0; > u64 q; > > - asm ("mulq %2; divq %3" : "=a" (q) > - : "a" (a), "rm" (mul), "rm" (div) > - : "rdx"); > + asm ("mulq %3; 1: divq %4; movl $1,%1; 2:\n" The "movl $1,%1" is a 5 byte instruction. Better to use either 'incl' or get the constraints right for 'movb' (both 2 bytes). > + _ASM_EXTABLE(1b, 2b) > + : "=a" (q), "+r" (ok) > + : "a" (a), "rm" (mul), "rm" (div) > + : "rdx"); > > - return q; > + if (ok) > + return q; > + WARN_ON_ONCE(!div); I think you need to WARN for overflow as well as divide by zero. David > + return ~(u64)0; > } > #define mul_u64_u64_div_u64 mul_u64_u64_div_u64 >
On 07/21, David Laight wrote: > > On Mon, 21 Jul 2025 15:04:22 +0200 > Oleg Nesterov <oleg@redhat.com> wrote: > > > Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't > > fit u64, this matches the generic implementation in lib/math/div64.c. > > Not quite, the generic version is likely to trap on divide by zero. I meant that the generic implementation returns -1ul too if the result doesn't fit into u64. > I think it would be better to always trap (eg BUG_ON(!div)). Well, I don't like adding a BUG_ON(), but OK. > The trouble there is that (an ignored) ~(u64)0 is likely to cause another > arithmetic overflow with even more consequences. > > So I'm not at all sure what it should look like or whether 0 is a better > error return (esp for div == 0). I'm not sure either but x86/generic versions should be consistent. Let's discuss this and possibly change both implementations later? > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > { > > + int ok = 0; > > u64 q; > > > > - asm ("mulq %2; divq %3" : "=a" (q) > > - : "a" (a), "rm" (mul), "rm" (div) > > - : "rdx"); > > + asm ("mulq %3; 1: divq %4; movl $1,%1; 2:\n" > > The "movl $1,%1" is a 5 byte instruction. > Better to use either 'incl' or get the constraints right for 'movb' Agreed, thanks, > > + if (ok) > > + return q; > > + WARN_ON_ONCE(!div); > > I think you need to WARN for overflow as well as divide by zero. The generic implementation doesn't WARN... OK, I won't argue. How about static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) { char ok = 0; u64 q; asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" _ASM_EXTABLE(1b, 2b) : "=a" (q), "+r" (ok) : "a" (a), "rm" (mul), "rm" (div) : "rdx"); if (ok) return q; BUG_ON(!div); WARN_ON_ONCE(1); return ~(u64)0; } ? Oleg.
On July 22, 2025 3:50:35 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: >On 07/21, David Laight wrote: >> >> On Mon, 21 Jul 2025 15:04:22 +0200 >> Oleg Nesterov <oleg@redhat.com> wrote: >> >> > Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't >> > fit u64, this matches the generic implementation in lib/math/div64.c. >> >> Not quite, the generic version is likely to trap on divide by zero. > >I meant that the generic implementation returns -1ul too if the result >doesn't fit into u64. > >> I think it would be better to always trap (eg BUG_ON(!div)). > >Well, I don't like adding a BUG_ON(), but OK. > >> The trouble there is that (an ignored) ~(u64)0 is likely to cause another >> arithmetic overflow with even more consequences. >> >> So I'm not at all sure what it should look like or whether 0 is a better >> error return (esp for div == 0). > >I'm not sure either but x86/generic versions should be consistent. Let's >discuss this and possibly change both implementations later? > >> > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) >> > { >> > + int ok = 0; >> > u64 q; >> > >> > - asm ("mulq %2; divq %3" : "=a" (q) >> > - : "a" (a), "rm" (mul), "rm" (div) >> > - : "rdx"); >> > + asm ("mulq %3; 1: divq %4; movl $1,%1; 2:\n" >> >> The "movl $1,%1" is a 5 byte instruction. >> Better to use either 'incl' or get the constraints right for 'movb' > >Agreed, thanks, > >> > + if (ok) >> > + return q; >> > + WARN_ON_ONCE(!div); >> >> I think you need to WARN for overflow as well as divide by zero. > >The generic implementation doesn't WARN... OK, I won't argue. >How about > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > { > char ok = 0; > u64 q; > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > _ASM_EXTABLE(1b, 2b) > : "=a" (q), "+r" (ok) > : "a" (a), "rm" (mul), "rm" (div) > : "rdx"); > > if (ok) > return q; > BUG_ON(!div); > WARN_ON_ONCE(1); > return ~(u64)0; > } > >? > >Oleg. > Maybe the generic version *should* warn? As far as the ok output, the Right Way™ to do it is with an asm goto instead of a status variable; the second best tends to be to use the flags output.
On 07/22, H. Peter Anvin wrote: > > On July 22, 2025 3:50:35 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: > > > >The generic implementation doesn't WARN... OK, I won't argue. > >How about > > > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > { > > char ok = 0; > > u64 q; > > > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > > _ASM_EXTABLE(1b, 2b) > > : "=a" (q), "+r" (ok) > > : "a" (a), "rm" (mul), "rm" (div) > > : "rdx"); > > > > if (ok) > > return q; > > BUG_ON(!div); > > WARN_ON_ONCE(1); > > return ~(u64)0; > > } > > > >? > > > >Oleg. > > Maybe the generic version *should* warn? David is going to change the generic version to WARN. > As far as the ok output, the Right Way™ to do it is with an asm goto instead > of a status variable; the second best tends to be to use the flags output. This is what I was going to do initially. But this needs CONFIG_CC_HAS_ASM_GOTO_OUTPUT Oleg.
On July 22, 2025 10:58:08 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: >On 07/22, H. Peter Anvin wrote: >> >> On July 22, 2025 3:50:35 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: >> > >> >The generic implementation doesn't WARN... OK, I won't argue. >> >How about >> > >> > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) >> > { >> > char ok = 0; >> > u64 q; >> > >> > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" >> > _ASM_EXTABLE(1b, 2b) >> > : "=a" (q), "+r" (ok) >> > : "a" (a), "rm" (mul), "rm" (div) >> > : "rdx"); >> > >> > if (ok) >> > return q; >> > BUG_ON(!div); >> > WARN_ON_ONCE(1); >> > return ~(u64)0; >> > } >> > >> >? >> > >> >Oleg. >> >> Maybe the generic version *should* warn? > >David is going to change the generic version to WARN. > >> As far as the ok output, the Right Way™ to do it is with an asm goto instead >> of a status variable; the second best tends to be to use the flags output. > >This is what I was going to do initially. But this needs >CONFIG_CC_HAS_ASM_GOTO_OUTPUT > >Oleg. > But that's what you want to optimize for, since that is all the modern compilers, even if you have to have two versions as a result.
On 07/22, H. Peter Anvin wrote: > > On July 22, 2025 10:58:08 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: > >On 07/22, H. Peter Anvin wrote: > >> > >> On July 22, 2025 3:50:35 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: > >> > > >> >The generic implementation doesn't WARN... OK, I won't argue. > >> >How about > >> > > >> > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > >> > { > >> > char ok = 0; > >> > u64 q; > >> > > >> > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > >> > _ASM_EXTABLE(1b, 2b) > >> > : "=a" (q), "+r" (ok) > >> > : "a" (a), "rm" (mul), "rm" (div) > >> > : "rdx"); > >> > > >> > if (ok) > >> > return q; > >> > BUG_ON(!div); > >> > WARN_ON_ONCE(1); > >> > return ~(u64)0; > >> > } > >> > > >> >? > >> > > >> >Oleg. > >> > >> Maybe the generic version *should* warn? > > > >David is going to change the generic version to WARN. > > > >> As far as the ok output, the Right Way™ to do it is with an asm goto instead > >> of a status variable; the second best tends to be to use the flags output. > > > >This is what I was going to do initially. But this needs > >CONFIG_CC_HAS_ASM_GOTO_OUTPUT > > > >Oleg. > > > > But that's what you want to optimize for, since that is all the modern compilers, even if you have to have two versions as a result. Well, this 'divq' is slow anyway, I don't won't to add 2 versions. Can we add the optimized version later if it really makes sense? Oleg.
On Tue, 22 Jul 2025 20:38:54 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > On 07/22, H. Peter Anvin wrote: > > > > On July 22, 2025 10:58:08 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: > > >On 07/22, H. Peter Anvin wrote: > > >> > > >> On July 22, 2025 3:50:35 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: > > >> > > > >> >The generic implementation doesn't WARN... OK, I won't argue. > > >> >How about > > >> > > > >> > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > >> > { > > >> > char ok = 0; > > >> > u64 q; > > >> > > > >> > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > > >> > _ASM_EXTABLE(1b, 2b) > > >> > : "=a" (q), "+r" (ok) > > >> > : "a" (a), "rm" (mul), "rm" (div) > > >> > : "rdx"); > > >> > > > >> > if (ok) > > >> > return q; > > >> > BUG_ON(!div); > > >> > WARN_ON_ONCE(1); > > >> > return ~(u64)0; > > >> > } > > >> > > > >> >? > > >> > > > >> >Oleg. > > >> > > >> Maybe the generic version *should* warn? > > > > > >David is going to change the generic version to WARN. > > > > > >> As far as the ok output, the Right Way™ to do it is with an asm goto instead > > >> of a status variable; the second best tends to be to use the flags output. > > > > > >This is what I was going to do initially. But this needs > > >CONFIG_CC_HAS_ASM_GOTO_OUTPUT > > > > > >Oleg. > > > > > > > But that's what you want to optimize for, since that is all the modern compilers, even if you have to have two versions as a result. > > Well, this 'divq' is slow anyway, I don't won't to add 2 versions. > Can we add the optimized version later if it really makes sense? Yes, what matters more is code size and simplicity of use (by the caller). Zen3 has a reasonably fast divq, but you have to get to 'cannon lake' to get an intel one that isn't near to 100 clocks. The generic code is horrid - nearly 1000 clocks for random data running the 32bit code on sandy bridge! (I'm not sure newer will be much better). (And non-x86 without 'sh[rl]d' will be worse.) I've re-written it (patches posted a few weeks back). Sandy bridge is now (from memory) ~250 clcoks in 32bit and ~150 in 64bit, zen5 ~80 (helped by the much faster divq used for 64/64 divide). The killer is mispredicted branches, change the arguments so a slightly different path is taken and it costs at least 20 clocks. I'm timing single calls, any kind of loop trains the branch predictor (I'm not running cold cache though). Given those clock counts I rally wouldn't worry about a few integer instructions in the x86_64 path. David > > Oleg. >
On 2025-07-22 11:38, Oleg Nesterov wrote: >> >> But that's what you want to optimize for, since that is all the modern compilers, even if you have to have two versions as a result. > > Well, this 'divq' is slow anyway, I don't won't to add 2 versions. > Can we add the optimized version later if it really makes sense? > Then it will guaranteed get forgotten. -hpa
On Tue, 22 Jul 2025 12:50:35 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > On 07/21, David Laight wrote: > > > > On Mon, 21 Jul 2025 15:04:22 +0200 > > Oleg Nesterov <oleg@redhat.com> wrote: > > > > > Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't > > > fit u64, this matches the generic implementation in lib/math/div64.c. > > > > Not quite, the generic version is likely to trap on divide by zero. > > I meant that the generic implementation returns -1ul too if the result > doesn't fit into u64. > > > I think it would be better to always trap (eg BUG_ON(!div)). > > Well, I don't like adding a BUG_ON(), but OK. > > > The trouble there is that (an ignored) ~(u64)0 is likely to cause another > > arithmetic overflow with even more consequences. > > > > So I'm not at all sure what it should look like or whether 0 is a better > > error return (esp for div == 0). > > I'm not sure either but x86/generic versions should be consistent. Let's > discuss this and possibly change both implementations later? My thought as well. Getting both to agree is a start. My latest thought is to add another parameter for the return value when the result overflows or is infinity/NaN. So the calling code can get 0, 1, ~0 (or any other 'safe' value) returned. A special 'magic' value could be used to mean BUG(). > > > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > > { > > > + int ok = 0; > > > u64 q; > > > > > > - asm ("mulq %2; divq %3" : "=a" (q) > > > - : "a" (a), "rm" (mul), "rm" (div) > > > - : "rdx"); > > > + asm ("mulq %3; 1: divq %4; movl $1,%1; 2:\n" > > > > The "movl $1,%1" is a 5 byte instruction. > > Better to use either 'incl' or get the constraints right for 'movb' > > Agreed, thanks, > > > > + if (ok) > > > + return q; > > > + WARN_ON_ONCE(!div); > > > > I think you need to WARN for overflow as well as divide by zero. > > The generic implementation doesn't WARN... OK, I won't argue. I've a set of patches I need to do a new version of. I'll add a WARN_ON_ONCE() to the generic version. I'll also put a copy of this patch in my set so that the later patches will apply after this is applied without too much hastle. > How about > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > { > char ok = 0; > u64 q; > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > _ASM_EXTABLE(1b, 2b) > : "=a" (q), "+r" (ok) That needs to be "+q" (ok) > : "a" (a), "rm" (mul), "rm" (div) > : "rdx"); > > if (ok) > return q; > BUG_ON(!div); > WARN_ON_ONCE(1); I know there are are a lot of WARN_ON_ONCE(1) out there, but maybe WARN_ON_ONCE("muldiv overflow") would be better? (The linker will merge the strings). David > return ~(u64)0; > } > > ? > > Oleg. >
On July 22, 2025 5:09:47 AM PDT, David Laight <david.laight.linux@gmail.com> wrote: >On Tue, 22 Jul 2025 12:50:35 +0200 >Oleg Nesterov <oleg@redhat.com> wrote: > >> On 07/21, David Laight wrote: >> > >> > On Mon, 21 Jul 2025 15:04:22 +0200 >> > Oleg Nesterov <oleg@redhat.com> wrote: >> > >> > > Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't >> > > fit u64, this matches the generic implementation in lib/math/div64.c. >> > >> > Not quite, the generic version is likely to trap on divide by zero. >> >> I meant that the generic implementation returns -1ul too if the result >> doesn't fit into u64. >> >> > I think it would be better to always trap (eg BUG_ON(!div)). >> >> Well, I don't like adding a BUG_ON(), but OK. >> >> > The trouble there is that (an ignored) ~(u64)0 is likely to cause another >> > arithmetic overflow with even more consequences. >> > >> > So I'm not at all sure what it should look like or whether 0 is a better >> > error return (esp for div == 0). >> >> I'm not sure either but x86/generic versions should be consistent. Let's >> discuss this and possibly change both implementations later? > >My thought as well. >Getting both to agree is a start. > >My latest thought is to add another parameter for the return value >when the result overflows or is infinity/NaN. >So the calling code can get 0, 1, ~0 (or any other 'safe' value) returned. >A special 'magic' value could be used to mean BUG(). > >> >> > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) >> > > { >> > > + int ok = 0; >> > > u64 q; >> > > >> > > - asm ("mulq %2; divq %3" : "=a" (q) >> > > - : "a" (a), "rm" (mul), "rm" (div) >> > > - : "rdx"); >> > > + asm ("mulq %3; 1: divq %4; movl $1,%1; 2:\n" >> > >> > The "movl $1,%1" is a 5 byte instruction. >> > Better to use either 'incl' or get the constraints right for 'movb' >> >> Agreed, thanks, >> >> > > + if (ok) >> > > + return q; >> > > + WARN_ON_ONCE(!div); >> > >> > I think you need to WARN for overflow as well as divide by zero. >> >> The generic implementation doesn't WARN... OK, I won't argue. > >I've a set of patches I need to do a new version of. >I'll add a WARN_ON_ONCE() to the generic version. >I'll also put a copy of this patch in my set so that the later patches >will apply after this is applied without too much hastle. > >> How about >> >> static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) >> { >> char ok = 0; >> u64 q; >> >> asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" >> _ASM_EXTABLE(1b, 2b) >> : "=a" (q), "+r" (ok) > >That needs to be "+q" (ok) > >> : "a" (a), "rm" (mul), "rm" (div) >> : "rdx"); >> >> if (ok) >> return q; >> BUG_ON(!div); >> WARN_ON_ONCE(1); > >I know there are are a lot of WARN_ON_ONCE(1) out there, >but maybe WARN_ON_ONCE("muldiv overflow") would be better? >(The linker will merge the strings). > > David > >> return ~(u64)0; >> } >> >> ? >> >> Oleg. >> > Note that -1 for division by zero (not necessarily for overflow) follows from most natural division algorithms, and so architectures which don't trap on division overflow tend to behave that way.
On Tue, 22 Jul 2025 09:53:21 -0700 "H. Peter Anvin" <hpa@zytor.com> wrote: ... > > Note that -1 for division by zero (not necessarily for overflow) follows from > most natural division algorithms, and so architectures which don't trap > on division overflow tend to behave that way. Do you think that just returning ~0 is enough (maybe with a WARN_ONCE) ? Code can check for it and the number of false positives would be minimal. I have wondered about having muldiv(a, b, c, d, errval) that returns 'errval' if '(a * b + c)/ d' overflows or if 'd' is zero. Otherwise you are trying to return too many values from a function (esp. on 32bit). 'asm goto' or 'flags' will only work for x86_64, starts being a real mess for the generic code. (Time for a language feature that lets the caller see modifications to a parameter that is passed by value...) David
On 07/22, David Laight wrote: > > On Tue, 22 Jul 2025 12:50:35 +0200 > Oleg Nesterov <oleg@redhat.com> wrote: > > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > { > > char ok = 0; > > u64 q; > > > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > > _ASM_EXTABLE(1b, 2b) > > : "=a" (q), "+r" (ok) > > That needs to be "+q" (ok) Thanks... I will never understand the asm constraints. > > if (ok) > > return q; > > BUG_ON(!div); > > WARN_ON_ONCE(1); > > I know there are are a lot of WARN_ON_ONCE(1) out there, > but maybe WARN_ON_ONCE("muldiv overflow") would be better? > (The linker will merge the strings). OK. If you are fine with this version I'll send V2. /* * Returns ULONG_MAX when the result doesn't fit u64. */ static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) { char ok = 0; u64 q; asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" _ASM_EXTABLE(1b, 2b) : "=a" (q), "+q" (ok) : "a" (a), "rm" (mul), "rm" (div) : "rdx"); if (ok) return q; BUG_ON(!div); WARN_ONCE(1, "muldiv overflow.\n"); return ~(u64)0; } Oleg.
On Tue, 22 Jul 2025 15:21:48 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > On 07/22, David Laight wrote: > > > > On Tue, 22 Jul 2025 12:50:35 +0200 > > Oleg Nesterov <oleg@redhat.com> wrote: > > > > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > > { > > > char ok = 0; > > > u64 q; > > > > > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > > > _ASM_EXTABLE(1b, 2b) > > > : "=a" (q), "+r" (ok) > > > > That needs to be "+q" (ok) > > Thanks... I will never understand the asm constraints. I'm not sure I understand all of them. But the previous version wouldn't compile. > > > > if (ok) > > > return q; > > > BUG_ON(!div); > > > WARN_ON_ONCE(1); > > > > I know there are are a lot of WARN_ON_ONCE(1) out there, > > but maybe WARN_ON_ONCE("muldiv overflow") would be better? > > (The linker will merge the strings). > > OK. If you are fine with this version I'll send V2. > > /* > * Returns ULONG_MAX when the result doesn't fit u64. > */ > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > { > char ok = 0; > u64 q; > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > _ASM_EXTABLE(1b, 2b) > : "=a" (q), "+q" (ok) > : "a" (a), "rm" (mul), "rm" (div) > : "rdx"); > > if (ok) > return q; > BUG_ON(!div); > WARN_ONCE(1, "muldiv overflow.\n"); I wonder what WARN_ON_ONCE("muldiv overflow") outputs? Actually, without the BUG or WARN you want: u64 fail = ~(u64)0; then incq $1 ... "+r" (fail) and finally return q | fail; to remove the conditional branches from the normal path (apart from one the caller might do) David > return ~(u64)0; > } > > Oleg. >
On 07/22, David Laight wrote: > > On Tue, 22 Jul 2025 15:21:48 +0200 > Oleg Nesterov <oleg@redhat.com> wrote: > > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > { > > char ok = 0; > > u64 q; > > > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > > _ASM_EXTABLE(1b, 2b) > > : "=a" (q), "+q" (ok) > > : "a" (a), "rm" (mul), "rm" (div) > > : "rdx"); > > > > if (ok) > > return q; > > BUG_ON(!div); > > WARN_ONCE(1, "muldiv overflow.\n"); > > I wonder what WARN_ON_ONCE("muldiv overflow") outputs? Well, it outputs "muldiv overflow." ;) So I am not sure it is better than just WARN_ON_ONCE(1). > Actually, without the BUG or WARN you want: > u64 fail = ~(u64)0; > then > incq $1 ... "+r" (fail) > and finally > return q | fail; > to remove the conditional branches from the normal path > (apart from one the caller might do) I was thinking about static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) { u64 q; asm ("mulq %2; 1: divq %3; jmp 3f; 2: movq $-1,%0; 3:\n" _ASM_EXTABLE(1b, 2b) : "=a" (q) : "a" (a), "rm" (mul), "rm" (div) : "rdx"); return q; } to remove the conditional branch and additional variable. Your version is probably beterr... But this is without WARN/BUG. So, which version do you prefer? Oleg.
On Wed, 23 Jul 2025 11:38:25 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > On 07/22, David Laight wrote: > > > > On Tue, 22 Jul 2025 15:21:48 +0200 > > Oleg Nesterov <oleg@redhat.com> wrote: > > > > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > > { > > > char ok = 0; > > > u64 q; > > > > > > asm ("mulq %3; 1: divq %4; movb $1,%1; 2:\n" > > > _ASM_EXTABLE(1b, 2b) > > > : "=a" (q), "+q" (ok) > > > : "a" (a), "rm" (mul), "rm" (div) > > > : "rdx"); > > > > > > if (ok) > > > return q; > > > BUG_ON(!div); > > > WARN_ONCE(1, "muldiv overflow.\n"); > > > > I wonder what WARN_ON_ONCE("muldiv overflow") outputs? > > Well, it outputs "muldiv overflow." ;) So I am not sure it is better > than just WARN_ON_ONCE(1). > > > Actually, without the BUG or WARN you want: > > u64 fail = ~(u64)0; > > then > > incq $1 ... "+r" (fail) > > and finally > > return q | fail; > > to remove the conditional branches from the normal path > > (apart from one the caller might do) > > I was thinking about > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > { > u64 q; > > asm ("mulq %2; 1: divq %3; jmp 3f; 2: movq $-1,%0; 3:\n" > _ASM_EXTABLE(1b, 2b) > : "=a" (q) > : "a" (a), "rm" (mul), "rm" (div) > : "rdx"); > > return q; > } > > to remove the conditional branch and additional variable. Your version > is probably beterr... But this is without WARN/BUG. I wish there was a way of doing a WARN_ONCE from asm with a single instruction. Then you could put one after your 2: Otherwise is it a conditional and a load of inlined code. > So, which version do you prefer? I wish I knew :-) Yours is a few bytes shorter, uses one less register, but has that unconditional jmp. I suspect we don't worry about the cpu not predicting a jump - especially with the divq. It's not as though some real-time code relies on this code being as fast as absolutely possible. Not using a register is probably the main win. So maybe I lose (this time). Further work could add an 'int *' parameter that is set non-zero (from %rax) if the divide traps; optimised out if NULL. The easy way is two copies of the asm statement. But I've already got two copies in the version that does (a * b + c)/d and four copies is getting silly. Actually this seems ok - at least as a real function: u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 div) { unsigned __int128 v = (__int128)a * b + c; asm ("1: divq %1; jmp 3f; 2: movq $-1,%%rax; 3:\n" _ASM_EXTABLE(1b, 2b) : "+A" (v) : "r" (div)); return v; } But (as I found with 32bit) gcc can decide to do a 128x128 multiply. It does do a full 128bit add - with an extra register for the zero. Not that you should never pass "rm" to clang, needs to be "r". There is a #define for it. David > > Oleg. >
On 07/23, David Laight wrote: > > On Wed, 23 Jul 2025 11:38:25 +0200 > Oleg Nesterov <oleg@redhat.com> wrote: > > > > to remove the conditional branch and additional variable. Your version > > is probably beterr... But this is without WARN/BUG. > > I wish there was a way of doing a WARN_ONCE from asm with a single instruction. > Then you could put one after your 2: > Otherwise is it a conditional and a load of inlined code. > > > So, which version do you prefer? > > I wish I knew :-) ;-) David, you understand this asm magic indefinitely better than me. Plus you are working on the generic code. Can you send the patch which looks right to you? I agree in advance with anything you do. I got lost. Now I don't even understand if we want to add BUG and/or WARN into mul_u64_u64_div_u64(). Thanks, Oleg.
On 07/24, Oleg Nesterov wrote: > On 07/23, David Laight wrote: > > > > On Wed, 23 Jul 2025 11:38:25 +0200 > > Oleg Nesterov <oleg@redhat.com> wrote: > > > > > > to remove the conditional branch and additional variable. Your version > > > is probably beterr... But this is without WARN/BUG. > > > > I wish there was a way of doing a WARN_ONCE from asm with a single instruction. > > Then you could put one after your 2: > > Otherwise is it a conditional and a load of inlined code. > > > > > So, which version do you prefer? > > > > I wish I knew :-) > > ;-) > > David, you understand this asm magic indefinitely better than me. Plus you are > working on the generic code. Can you send the patch which looks right to you? > I agree in advance with anything you do. > > I got lost. Now I don't even understand if we want to add BUG and/or WARN into > mul_u64_u64_div_u64(). Forgot to mention... Not that I think this is a good idea, but if we don't use BUG/WARN, we can probably add EX_FLAG_ and do something like below. Oleg. --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -38,6 +38,9 @@ static bool ex_handler_default(const struct exception_table_entry *e, if (e->data & EX_FLAG_CLEAR_DX) regs->dx = 0; + if (e->data & EX_FLAG_XXX_AX) + regs->ax = -1ul; + regs->ip = ex_fixup_addr(e); return true; }
On Thu, 24 Jul 2025 10:25:48 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > On 07/24, Oleg Nesterov wrote: > > On 07/23, David Laight wrote: > > > > > > On Wed, 23 Jul 2025 11:38:25 +0200 > > > Oleg Nesterov <oleg@redhat.com> wrote: > > > > > > > > to remove the conditional branch and additional variable. Your version > > > > is probably beterr... But this is without WARN/BUG. > > > > > > I wish there was a way of doing a WARN_ONCE from asm with a single instruction. > > > Then you could put one after your 2: > > > Otherwise is it a conditional and a load of inlined code. > > > > > > > So, which version do you prefer? > > > > > > I wish I knew :-) > > > > ;-) > > > > David, you understand this asm magic indefinitely better than me. Plus you are > > working on the generic code. Can you send the patch which looks right to you? > > I agree in advance with anything you do. > > > > I got lost. Now I don't even understand if we want to add BUG and/or WARN into > > mul_u64_u64_div_u64(). > > Forgot to mention... Not that I think this is a good idea, but if we don't > use BUG/WARN, we can probably add EX_FLAG_ and do something like below. I'd not looked there. That is certainly best if WARN/BUG is deemed unnecessary. (That is the type of question I'd defer to 'management'!) > > Oleg. > > --- a/arch/x86/mm/extable.c > +++ b/arch/x86/mm/extable.c > @@ -38,6 +38,9 @@ static bool ex_handler_default(const struct exception_table_entry *e, > if (e->data & EX_FLAG_CLEAR_DX) > regs->dx = 0; > > + if (e->data & EX_FLAG_XXX_AX) > + regs->ax = -1ul; That would need to set %eax to a 64bit ~0u; I don't think the above would sign extend the value. Makes me think - always bad. I wonder how hard it would be to implement EX_FLAG_WARN_ONCE. Mostly it would need a writeable bitmap with one bit for each extable entry. David > + > regs->ip = ex_fixup_addr(e); > return true; > } >
On 07/24, David Laight wrote: > > On Thu, 24 Jul 2025 10:25:48 +0200 > Oleg Nesterov <oleg@redhat.com> wrote: > > > --- a/arch/x86/mm/extable.c > > +++ b/arch/x86/mm/extable.c > > @@ -38,6 +38,9 @@ static bool ex_handler_default(const struct exception_table_entry *e, > > if (e->data & EX_FLAG_CLEAR_DX) > > regs->dx = 0; > > > > + if (e->data & EX_FLAG_XXX_AX) > > + regs->ax = -1ul; > > That would need to set %eax to a 64bit ~0u; > I don't think the above would sign extend the value. Hmm... could you spell please? pt_regs->ax is always 'unsigned long', regardless of bitness... > Makes me think - always bad. > I wonder how hard it would be to implement EX_FLAG_WARN_ONCE. > Mostly it would need a writeable bitmap with one bit for each > extable entry. Would be nice... But who else will use this feature ? ;) I mean, it needs some justification. Oleg.
Finally. If we really want to optimize this function as much as possible, we can add the CONFIG_CC_HAS_ASM_GOTO_OUTPUT version as Peter suggests. I guess this should work: u64 test(u64 a, u64 mul, u64 div) { u64 q; asm goto ("mulq %2; 1: divq %3\n" _ASM_EXTABLE(1b, %l[fail]) : "=a" (q) : "a" (a), "rm" (mul), "rm" (div) : "rdx" : fail); return q; fail: // BUG? WARN? return -1ul; } I agree with everything ;) Oleg. On 07/24, Oleg Nesterov wrote: > > On 07/24, Oleg Nesterov wrote: > > On 07/23, David Laight wrote: > > > > > > On Wed, 23 Jul 2025 11:38:25 +0200 > > > Oleg Nesterov <oleg@redhat.com> wrote: > > > > > > > > to remove the conditional branch and additional variable. Your version > > > > is probably beterr... But this is without WARN/BUG. > > > > > > I wish there was a way of doing a WARN_ONCE from asm with a single instruction. > > > Then you could put one after your 2: > > > Otherwise is it a conditional and a load of inlined code. > > > > > > > So, which version do you prefer? > > > > > > I wish I knew :-) > > > > ;-) > > > > David, you understand this asm magic indefinitely better than me. Plus you are > > working on the generic code. Can you send the patch which looks right to you? > > I agree in advance with anything you do. > > > > I got lost. Now I don't even understand if we want to add BUG and/or WARN into > > mul_u64_u64_div_u64(). > > Forgot to mention... Not that I think this is a good idea, but if we don't > use BUG/WARN, we can probably add EX_FLAG_ and do something like below. > > Oleg. > > --- a/arch/x86/mm/extable.c > +++ b/arch/x86/mm/extable.c > @@ -38,6 +38,9 @@ static bool ex_handler_default(const struct exception_table_entry *e, > if (e->data & EX_FLAG_CLEAR_DX) > regs->dx = 0; > > + if (e->data & EX_FLAG_XXX_AX) > + regs->ax = -1ul; > + > regs->ip = ex_fixup_addr(e); > return true; > }
On July 24, 2025 4:14:26 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: >Finally. If we really want to optimize this function as much as possible, >we can add the CONFIG_CC_HAS_ASM_GOTO_OUTPUT version as Peter suggests. >I guess this should work: > > u64 test(u64 a, u64 mul, u64 div) > { > u64 q; > > asm goto ("mulq %2; 1: divq %3\n" > _ASM_EXTABLE(1b, %l[fail]) > : "=a" (q) > : "a" (a), "rm" (mul), "rm" (div) > : "rdx" > : fail); > > return q; > fail: > // BUG? WARN? > return -1ul; > } > >I agree with everything ;) > >Oleg. > >On 07/24, Oleg Nesterov wrote: >> >> On 07/24, Oleg Nesterov wrote: >> > On 07/23, David Laight wrote: >> > > >> > > On Wed, 23 Jul 2025 11:38:25 +0200 >> > > Oleg Nesterov <oleg@redhat.com> wrote: >> > > > >> > > > to remove the conditional branch and additional variable. Your version >> > > > is probably beterr... But this is without WARN/BUG. >> > > >> > > I wish there was a way of doing a WARN_ONCE from asm with a single instruction. >> > > Then you could put one after your 2: >> > > Otherwise is it a conditional and a load of inlined code. >> > > >> > > > So, which version do you prefer? >> > > >> > > I wish I knew :-) >> > >> > ;-) >> > >> > David, you understand this asm magic indefinitely better than me. Plus you are >> > working on the generic code. Can you send the patch which looks right to you? >> > I agree in advance with anything you do. >> > >> > I got lost. Now I don't even understand if we want to add BUG and/or WARN into >> > mul_u64_u64_div_u64(). >> >> Forgot to mention... Not that I think this is a good idea, but if we don't >> use BUG/WARN, we can probably add EX_FLAG_ and do something like below. >> >> Oleg. >> >> --- a/arch/x86/mm/extable.c >> +++ b/arch/x86/mm/extable.c >> @@ -38,6 +38,9 @@ static bool ex_handler_default(const struct exception_table_entry *e, >> if (e->data & EX_FLAG_CLEAR_DX) >> regs->dx = 0; >> >> + if (e->data & EX_FLAG_XXX_AX) >> + regs->ax = -1ul; >> + >> regs->ip = ex_fixup_addr(e); >> return true; >> } > Seems good to me.
On 07/24, H. Peter Anvin wrote: > > On July 24, 2025 4:14:26 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: > >Finally. If we really want to optimize this function as much as possible, > >we can add the CONFIG_CC_HAS_ASM_GOTO_OUTPUT version as Peter suggests. > >I guess this should work: ... > >> Forgot to mention... Not that I think this is a good idea, but if we don't > >> use BUG/WARN, we can probably add EX_FLAG_ and do something like below. ... > Seems good to me. Thanks, but which one? "asm goto" or EX_FLAG_XXX_AX hack? As for the latter. I took another look at asm/extable_fixup_types.h and it turns out we don't need a new EX_FLAG_, this version static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) { u64 q; asm ("mulq %2; 1: divq %3; 2:\n" _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_IMM_REG | EX_DATA_IMM(-1)) : "=a" (q) : "a" (a), "rm" (mul), "rm" (div) : "rdx"); return q; } seems to work and I guess it is the absolute winner performance wise. But to me the main question is: Peter, David, do we want to add BUG and/or WARN into mul_u64_u64_div_u64??? If yes, then this version won't work. Oleg.
On Fri, 25 Jul 2025 12:12:02 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > On 07/24, H. Peter Anvin wrote: > > > > On July 24, 2025 4:14:26 AM PDT, Oleg Nesterov <oleg@redhat.com> wrote: > > >Finally. If we really want to optimize this function as much as possible, > > >we can add the CONFIG_CC_HAS_ASM_GOTO_OUTPUT version as Peter suggests. > > >I guess this should work: > > ... > > > >> Forgot to mention... Not that I think this is a good idea, but if we don't > > >> use BUG/WARN, we can probably add EX_FLAG_ and do something like below. > > ... > > > Seems good to me. > > Thanks, but which one? "asm goto" or EX_FLAG_XXX_AX hack? > > As for the latter. I took another look at asm/extable_fixup_types.h > and it turns out we don't need a new EX_FLAG_, this version > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > { > u64 q; > > asm ("mulq %2; 1: divq %3; 2:\n" > _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_IMM_REG | EX_DATA_IMM(-1)) That should be _ASM_EXTABLE_TYPE_REG() with an extra %%rax parameter. It works because ax is register zero. > : "=a" (q) > : "a" (a), "rm" (mul), "rm" (div) The "rm" should both be ASM_INPUT_RM > : "rdx"); > > return q; > } > > seems to work and I guess it is the absolute winner performance wise. > > But to me the main question is: Peter, David, do we want to add > BUG and/or WARN into mul_u64_u64_div_u64??? If yes, then this version > won't work. Looking through the code in extable.[ch] there is actually scope for adding an extra EX_TYPE that is the same as IMM_REG but contains a WARN_ONCE(). It would be a 'global' ONCE for all such traps, but that probably doesn't matter. The easiest way to make it 'per trap site' is to modify the 'exception_table_entry' itself (eg change the type), but that might not be acceptable. An alternative would be a special EX_TYPE_DIVIDE_OVERFLOW with an explicit message. Probably best to leave all that for later. Perhaps more useful would be adding an 'int *overflow' argument. If not a compile-time NULL then set to 0 on entry and -1 if there is a trap. That would need the 'jmp in the normal path' asm version - so two copies of the entire asm block. Another one for a later patch. David > > Oleg. >
On 07/25, David Laight wrote: > > On Fri, 25 Jul 2025 12:12:02 +0200 > Oleg Nesterov <oleg@redhat.com> wrote: > > > As for the latter. I took another look at asm/extable_fixup_types.h > > and it turns out we don't need a new EX_FLAG_, this version > > > > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > > { > > u64 q; > > > > asm ("mulq %2; 1: divq %3; 2:\n" > > _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_IMM_REG | EX_DATA_IMM(-1)) > > That should be _ASM_EXTABLE_TYPE_REG() with an extra %%rax parameter. Not at all. EX_TYPE_IMM_REG needs a simple EX_DATA_REG(). > It works because ax is register zero. Yes, just like other users of EX_TYPE_IMM_REG which use the "default" regs->ax. But I won't mind to add the unnecessary EX_DATA_REG(0) if you want it. > > : "a" (a), "rm" (mul), "rm" (div) > > The "rm" should both be ASM_INPUT_RM You have already mentioned this before, but I disagree. I mean, this needs another patch. IIUC, this change is not needed for correctness, and the same arch/x86/include/asm/div64.h file has a lot more "rm"'s. So that patch should probably change them all and check the generated code with clang. > > But to me the main question is: Peter, David, do we want to add > > BUG and/or WARN into mul_u64_u64_div_u64??? If yes, then this version > > won't work. > > Looking through the code in extable.[ch] there is actually scope for adding > an extra EX_TYPE that is the same as IMM_REG but contains a WARN_ONCE(). I thought about this too. I think a new EX_FLAG_ makes more sense. > It would be a 'global' ONCE for all such traps, Yes, unfortunately. But lets discuss this later. Oleg.
Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't
fit into u64 or div == 0. The former matches the generic implementation
in lib/math/div64.c, the latter doesn't. Perhaps we will add a WARN()
into the fixup_exception() paths later.
No need to use _ASM_EXTABLE_TYPE_REG(), we know that the target register
is pt_regs->ax with offset == 0, so a simple EX_DATA_REG(0) should work
just fine.
Reported-by: Li RongQing <lirongqing@baidu.com>
Link: https://lore.kernel.org/all/78a0d7bb20504c0884d474868eccd858@baidu.com/
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
arch/x86/include/asm/div64.h | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..0bf2c6afe66e 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -79,18 +79,21 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
#else
# include <asm-generic/div64.h>
+# include <asm/asm.h>
/*
- * Will generate an #DE when the result doesn't fit u64, could fix with an
- * __ex_table[] entry when it becomes an issue.
+ * Returns ULONG_MAX if the result doesn't fit u64 or div == 0.
*/
static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
{
u64 q;
- asm ("mulq %2; divq %3" : "=a" (q)
- : "a" (a), "rm" (mul), "rm" (div)
- : "rdx");
+ asm ("mulq %2; 1: divq %3; 2:\n"
+ _ASM_EXTABLE_TYPE(1b, 2b,
+ EX_TYPE_IMM_REG | EX_DATA_REG(0) | EX_DATA_IMM(-1))
+ : "=a" (q)
+ : "a" (a), "rm" (mul), "rm" (div)
+ : "rdx");
return q;
}
--
2.25.1.362.g51ebf55
> Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't fit > into u64 or div == 0. The former matches the generic implementation in > lib/math/div64.c, the latter doesn't. Perhaps we will add a WARN() into the > fixup_exception() paths later. > > No need to use _ASM_EXTABLE_TYPE_REG(), we know that the target register > is pt_regs->ax with offset == 0, so a simple EX_DATA_REG(0) should work just > fine. > > Reported-by: Li RongQing <lirongqing@baidu.com> > Link: > https://lore.kernel.org/all/78a0d7bb20504c0884d474868eccd858@baidu.com/ > Signed-off-by: Oleg Nesterov <oleg@redhat.com> > --- Tested-by: Li RongQing <lirongqing@baidu.com> Thanks for fixing Br
On Sun, 27 Jul 2025 14:34:58 +0200 Oleg Nesterov <oleg@redhat.com> wrote: > Change mul_u64_u64_div_u64() to return ULONG_MAX if the result doesn't > fit into u64 or div == 0. The former matches the generic implementation > in lib/math/div64.c, the latter doesn't. Perhaps we will add a WARN() > into the fixup_exception() paths later. > > No need to use _ASM_EXTABLE_TYPE_REG(), we know that the target register > is pt_regs->ax with offset == 0, so a simple EX_DATA_REG(0) should work > just fine. > > Reported-by: Li RongQing <lirongqing@baidu.com> > Link: https://lore.kernel.org/all/78a0d7bb20504c0884d474868eccd858@baidu.com/ > Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: David Laight <david.laight.linux@gmail.com> > --- > arch/x86/include/asm/div64.h | 13 ++++++++----- > 1 file changed, 8 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h > index 9931e4c7d73f..0bf2c6afe66e 100644 > --- a/arch/x86/include/asm/div64.h > +++ b/arch/x86/include/asm/div64.h > @@ -79,18 +79,21 @@ static inline u64 mul_u32_u32(u32 a, u32 b) > > #else > # include <asm-generic/div64.h> > +# include <asm/asm.h> > > /* > - * Will generate an #DE when the result doesn't fit u64, could fix with an > - * __ex_table[] entry when it becomes an issue. > + * Returns ULONG_MAX if the result doesn't fit u64 or div == 0. > */ > static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) > { > u64 q; > > - asm ("mulq %2; divq %3" : "=a" (q) > - : "a" (a), "rm" (mul), "rm" (div) > - : "rdx"); > + asm ("mulq %2; 1: divq %3; 2:\n" > + _ASM_EXTABLE_TYPE(1b, 2b, > + EX_TYPE_IMM_REG | EX_DATA_REG(0) | EX_DATA_IMM(-1)) > + : "=a" (q) > + : "a" (a), "rm" (mul), "rm" (div) > + : "rdx"); > > return q; > }
© 2016 - 2025 Red Hat, Inc.