Add the trait `ParseInt` for parsing string representations of integers
where the string representations are optionally prefixed by a radix
specifier. Implement the trait for the primitive integer types.
Tested-by: Daniel Gomez <da.gomez@samsung.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
rust/kernel/str.rs | 2 +
rust/kernel/str/parse_int.rs | 171 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 173 insertions(+)
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index a927db8e079c..2b6c8b4a0ae4 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -8,6 +8,8 @@
use crate::prelude::*;
+pub mod parse_int;
+
/// Byte string without UTF-8 validity guarantee.
#[repr(transparent)]
pub struct BStr([u8]);
diff --git a/rust/kernel/str/parse_int.rs b/rust/kernel/str/parse_int.rs
new file mode 100644
index 000000000000..0754490aec4b
--- /dev/null
+++ b/rust/kernel/str/parse_int.rs
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Integer parsing functions.
+//!
+//! Integer parsing functions for parsing signed and unsigned integers
+//! potentially prefixed with `0x`, `0o`, or `0b`.
+
+use crate::prelude::*;
+use crate::str::BStr;
+use core::ops::Deref;
+
+// Make `FromStrRadix` a public type with a private name. This seals
+// `ParseInt`, that is, prevents downstream users from implementing the
+// trait.
+mod private {
+ use crate::str::BStr;
+
+ /// Trait that allows parsing a [`&BStr`] to an integer with a radix.
+ ///
+ /// # Safety
+ ///
+ /// The member functions of this trait must be implemented according to
+ /// their documentation.
+ ///
+ /// [`&BStr`]: kernel::str::BStr
+ // This is required because the `from_str_radix` function on the primitive
+ // integer types is not part of any trait.
+ pub unsafe trait FromStrRadix: Sized {
+ /// The minimum value this integer type can assume.
+ const MIN: Self;
+
+ /// Parse `src` to [`Self`] using radix `radix`.
+ fn from_str_radix(src: &BStr, radix: u32) -> Result<Self, crate::error::Error>;
+
+ /// Return the absolute value of [`Self::MIN`].
+ fn abs_min() -> u64;
+
+ /// Perform bitwise 2's complement on `self`.
+ ///
+ /// Note: This function does not make sense for unsigned integers.
+ fn complement(self) -> Self;
+ }
+}
+
+/// Extract the radix from an integer literal optionally prefixed with
+/// one of `0x`, `0X`, `0o`, `0O`, `0b`, `0B`, `0`.
+fn strip_radix(src: &BStr) -> (u32, &BStr) {
+ match src.deref() {
+ [b'0', b'x' | b'X', rest @ ..] => (16, rest.as_ref()),
+ [b'0', b'o' | b'O', rest @ ..] => (8, rest.as_ref()),
+ [b'0', b'b' | b'B', rest @ ..] => (2, rest.as_ref()),
+ // NOTE: We are including the leading zero to be able to parse
+ // literal `0` here. If we removed it as a radix prefix, we would
+ // not be able to parse `0`.
+ [b'0', ..] => (8, src),
+ _ => (10, src),
+ }
+}
+
+/// Trait for parsing string representations of integers.
+///
+/// Strings beginning with `0x`, `0o`, or `0b` are parsed as hex, octal, or
+/// binary respectively. Strings beginning with `0` otherwise are parsed as
+/// octal. Anything else is parsed as decimal. A leading `+` or `-` is also
+/// permitted. Any string parsed by [`kstrtol()`] or [`kstrtoul()`] will be
+/// successfully parsed.
+///
+/// [`kstrtol()`]: https://docs.kernel.org/core-api/kernel-api.html#c.kstrtol
+/// [`kstrtoul()`]: https://docs.kernel.org/core-api/kernel-api.html#c.kstrtoul
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::str::parse_int::ParseInt;
+/// # use kernel::b_str;
+///
+/// assert_eq!(Ok(0u8), u8::from_str(b_str!("0")));
+///
+/// assert_eq!(Ok(0xa2u8), u8::from_str(b_str!("0xa2")));
+/// assert_eq!(Ok(-0xa2i32), i32::from_str(b_str!("-0xa2")));
+///
+/// assert_eq!(Ok(-0o57i8), i8::from_str(b_str!("-0o57")));
+/// assert_eq!(Ok(0o57i8), i8::from_str(b_str!("057")));
+///
+/// assert_eq!(Ok(0b1001i16), i16::from_str(b_str!("0b1001")));
+/// assert_eq!(Ok(-0b1001i16), i16::from_str(b_str!("-0b1001")));
+///
+/// assert_eq!(Ok(127i8), i8::from_str(b_str!("127")));
+/// assert!(i8::from_str(b_str!("128")).is_err());
+/// assert_eq!(Ok(-128i8), i8::from_str(b_str!("-128")));
+/// assert!(i8::from_str(b_str!("-129")).is_err());
+/// assert_eq!(Ok(255u8), u8::from_str(b_str!("255")));
+/// assert!(u8::from_str(b_str!("256")).is_err());
+/// ```
+pub trait ParseInt: private::FromStrRadix + TryFrom<u64> {
+ /// Parse a string according to the description in [`Self`].
+ fn from_str(src: &BStr) -> Result<Self> {
+ match src.deref() {
+ [b'-', rest @ ..] => {
+ let (radix, digits) = strip_radix(rest.as_ref());
+ // 2's complement values range from -2^(b-1) to 2^(b-1)-1.
+ // So if we want to parse negative numbers as positive and
+ // later multiply by -1, we have to parse into a larger
+ // integer. We choose `u64` as sufficiently large.
+ //
+ // NOTE: 128 bit integers are not available on all
+ // platforms, hence the choice of 64 bits.
+ let val =
+ u64::from_str_radix(core::str::from_utf8(digits).map_err(|_| EINVAL)?, radix)
+ .map_err(|_| EINVAL)?;
+
+ if val > Self::abs_min() {
+ return Err(EINVAL);
+ }
+
+ if val == Self::abs_min() {
+ return Ok(Self::MIN);
+ }
+
+ // SAFETY: We checked that `val` will fit in `Self` above.
+ let val: Self = unsafe { val.try_into().unwrap_unchecked() };
+
+ Ok(val.complement())
+ }
+ _ => {
+ let (radix, digits) = strip_radix(src);
+ Self::from_str_radix(digits, radix).map_err(|_| EINVAL)
+ }
+ }
+ }
+}
+
+macro_rules! impl_parse_int {
+ ($ty:ty) => {
+ // SAFETY: We implement the trait according to the documentation.
+ unsafe impl private::FromStrRadix for $ty {
+ const MIN: Self = <$ty>::MIN;
+
+ fn from_str_radix(src: &BStr, radix: u32) -> Result<Self, crate::error::Error> {
+ <$ty>::from_str_radix(core::str::from_utf8(src).map_err(|_| EINVAL)?, radix)
+ .map_err(|_| EINVAL)
+ }
+
+ fn abs_min() -> u64 {
+ #[allow(unused_comparisons)]
+ if Self::MIN < 0 {
+ 1u64 << (Self::BITS - 1)
+ } else {
+ 0
+ }
+ }
+
+ fn complement(self) -> Self {
+ (!self).wrapping_add((1 as $ty))
+ }
+ }
+
+ impl ParseInt for $ty {}
+ };
+}
+
+impl_parse_int!(i8);
+impl_parse_int!(u8);
+impl_parse_int!(i16);
+impl_parse_int!(u16);
+impl_parse_int!(i32);
+impl_parse_int!(u32);
+impl_parse_int!(i64);
+impl_parse_int!(u64);
+impl_parse_int!(isize);
+impl_parse_int!(usize);
--
2.47.2
On Thu Jun 12, 2025 at 3:40 PM CEST, Andreas Hindborg wrote: > +pub trait ParseInt: private::FromStrRadix + TryFrom<u64> { > + /// Parse a string according to the description in [`Self`]. > + fn from_str(src: &BStr) -> Result<Self> { > + match src.deref() { > + [b'-', rest @ ..] => { > + let (radix, digits) = strip_radix(rest.as_ref()); > + // 2's complement values range from -2^(b-1) to 2^(b-1)-1. > + // So if we want to parse negative numbers as positive and > + // later multiply by -1, we have to parse into a larger > + // integer. We choose `u64` as sufficiently large. > + // > + // NOTE: 128 bit integers are not available on all > + // platforms, hence the choice of 64 bits. > + let val = > + u64::from_str_radix(core::str::from_utf8(digits).map_err(|_| EINVAL)?, radix) > + .map_err(|_| EINVAL)?; > + > + if val > Self::abs_min() { > + return Err(EINVAL); > + } > + > + if val == Self::abs_min() { > + return Ok(Self::MIN); > + } > + > + // SAFETY: We checked that `val` will fit in `Self` above. Sorry that it took me this long to realize, but this seems pretty weird. I guess this is why the `FromStrRadix` is `unsafe`. Can we just move this part of the code to `FromStrRadix` and make that trait safe? So essentially have: fn from_u64(value: u64) -> Result<Self>; in `FromStrRadix` and remove `MIN`, `abs_min` and `complement`. Then implement it like this in the macro below: const ABS_MIN = /* existing abs_min impl */; if value > ABS_MIN { return Err(EINVAL); } if val == ABS_MIN { return Ok(<$ty>::MIN); } // SAFETY: We checked that `val` will fit in `Self` above. let val: $ty = unsafe { val.try_into().unwrap_unchecked() }; (!val).wrapping_add(1) The reason that this is fine and the above is "weird" is the following: The current version only has `Self: FromStrRadix` which gives it access to the following guarantee from the `unsafe` trait: /// The member functions of this trait must be implemented according to /// their documentation. /// /// [`&BStr`]: kernel::str::BStr This doesn't mention `TryFrom<u64>` and thus the comment "We checked that `val` will fit in `Self` above" doesn't really apply: how does checking with the bounds given in `FromStrRadix` make `TryFrom` return `Ok`? If we move this code into the implementation of `FromStrRadix`, then we are locally in a context where we *know* the concrete type of `Self` and can thus rely on "checking" being the correct thing for `TryFrom`. With this adjustment, I can give my RB, but please let me take a look before you send it again :) --- Cheers, Benno > + let val: Self = unsafe { val.try_into().unwrap_unchecked() }; > + > + Ok(val.complement()) > + } > + _ => { > + let (radix, digits) = strip_radix(src); > + Self::from_str_radix(digits, radix).map_err(|_| EINVAL) > + } > + } > + } > +}
"Benno Lossin" <lossin@kernel.org> writes: > On Thu Jun 12, 2025 at 3:40 PM CEST, Andreas Hindborg wrote: >> +pub trait ParseInt: private::FromStrRadix + TryFrom<u64> { >> + /// Parse a string according to the description in [`Self`]. >> + fn from_str(src: &BStr) -> Result<Self> { >> + match src.deref() { >> + [b'-', rest @ ..] => { >> + let (radix, digits) = strip_radix(rest.as_ref()); >> + // 2's complement values range from -2^(b-1) to 2^(b-1)-1. >> + // So if we want to parse negative numbers as positive and >> + // later multiply by -1, we have to parse into a larger >> + // integer. We choose `u64` as sufficiently large. >> + // >> + // NOTE: 128 bit integers are not available on all >> + // platforms, hence the choice of 64 bits. >> + let val = >> + u64::from_str_radix(core::str::from_utf8(digits).map_err(|_| EINVAL)?, radix) >> + .map_err(|_| EINVAL)?; >> + >> + if val > Self::abs_min() { >> + return Err(EINVAL); >> + } >> + >> + if val == Self::abs_min() { >> + return Ok(Self::MIN); >> + } >> + >> + // SAFETY: We checked that `val` will fit in `Self` above. > > Sorry that it took me this long to realize, but this seems pretty weird. > I guess this is why the `FromStrRadix` is `unsafe`. > > Can we just move this part of the code to `FromStrRadix` and make that > trait safe? > > So essentially have: > > fn from_u64(value: u64) -> Result<Self>; > > in `FromStrRadix` and remove `MIN`, `abs_min` and `complement`. Then > implement it like this in the macro below: > > const ABS_MIN = /* existing abs_min impl */; > if value > ABS_MIN { > return Err(EINVAL); > } > if val == ABS_MIN { > return Ok(<$ty>::MIN); > } > // SAFETY: We checked that `val` will fit in `Self` above. > let val: $ty = unsafe { val.try_into().unwrap_unchecked() }; > (!val).wrapping_add(1) > > The reason that this is fine and the above is "weird" is the following: > The current version only has `Self: FromStrRadix` which gives it access > to the following guarantee from the `unsafe` trait: > > /// The member functions of this trait must be implemented according to > /// their documentation. > /// > /// [`&BStr`]: kernel::str::BStr > > This doesn't mention `TryFrom<u64>` and thus the comment "We checked > that `val` will fit in `Self` above" doesn't really apply: how does > checking with the bounds given in `FromStrRadix` make `TryFrom` return > `Ok`? I'm having a difficult time parsing. Are you suggesting that we guard against implementations of `TryInto<u64>` that misbehave? Best regards, Andreas Hindborg
On Thu Jun 19, 2025 at 1:12 PM CEST, Andreas Hindborg wrote: > I'm having a difficult time parsing. Are you suggesting that we guard > against implementations of `TryInto<u64>` that misbehave? Let me try a different explanation: The safety requirement for implementing the `FromStrRadix`: /// The member functions of this trait must be implemented according to /// their documentation. Together with the functions of the trait: /// Parse `src` to [`Self`] using radix `radix`. fn from_str_radix(src: &BStr, radix: u32) -> Result<Self, crate::error::Error>; /// Return the absolute value of [`Self::MIN`]. fn abs_min() -> u64; /// Perform bitwise 2's complement on `self`. /// /// Note: This function does not make sense for unsigned integers. fn complement(self) -> Self; Doesn't make sense. What does it mean to return the "absolute value of [`Self::MIN`]"? We don't have "absolute value" defined for an arbitrary type. Similarly for `complement` and `from_str_radix`, what does "Parse `src` to [`Self`] using radex `radix`" mean? It's not well-defined. You use this safety requirement in the parsing branch for negative numbers (the `unsafe` call at the bottom): [b'-', rest @ ..] => { let (radix, digits) = strip_radix(rest.as_ref()); // 2's complement values range from -2^(b-1) to 2^(b-1)-1. // So if we want to parse negative numbers as positive and // later multiply by -1, we have to parse into a larger // integer. We choose `u64` as sufficiently large. // // NOTE: 128 bit integers are not available on all // platforms, hence the choice of 64 bits. let val = u64::from_str_radix(core::str::from_utf8(digits).map_err(|_| EINVAL)?, radix) .map_err(|_| EINVAL)?; if val > Self::abs_min() { return Err(EINVAL); } if val == Self::abs_min() { return Ok(Self::MIN); } // SAFETY: We checked that `val` will fit in `Self` above. let val: Self = unsafe { val.try_into().unwrap_unchecked() }; Ok(val.complement()) } But you don't mention that the check is valid due to the safety requirements of implementing `FromStrRadix`. But even if you did, that wouldn't mean anything as I explained above. So let's instead move all of this negation & u64 conversion logic into the `FromStrRadix` trait. Then it can be safe & the `ParseInt::from_str` function doesn't use `unsafe` (there still will be `unsafe` in the macro, but that is fine, as it's more local and knows the concrete types). --- Cheers, Benno Here is what I have in mind: diff --git a/rust/kernel/str/parse_int.rs b/rust/kernel/str/parse_int.rs index 0754490aec4b..9d6e146c5ea7 100644 --- a/rust/kernel/str/parse_int.rs +++ b/rust/kernel/str/parse_int.rs @@ -13,32 +13,16 @@ // `ParseInt`, that is, prevents downstream users from implementing the // trait. mod private { + use crate::prelude::*; use crate::str::BStr; /// Trait that allows parsing a [`&BStr`] to an integer with a radix. - /// - /// # Safety - /// - /// The member functions of this trait must be implemented according to - /// their documentation. - /// - /// [`&BStr`]: kernel::str::BStr - // This is required because the `from_str_radix` function on the primitive - // integer types is not part of any trait. - pub unsafe trait FromStrRadix: Sized { - /// The minimum value this integer type can assume. - const MIN: Self; - + pub trait FromStrRadix: Sized { /// Parse `src` to [`Self`] using radix `radix`. - fn from_str_radix(src: &BStr, radix: u32) -> Result<Self, crate::error::Error>; - - /// Return the absolute value of [`Self::MIN`]. - fn abs_min() -> u64; + fn from_str_radix(src: &BStr, radix: u32) -> Result<Self>; - /// Perform bitwise 2's complement on `self`. - /// - /// Note: This function does not make sense for unsigned integers. - fn complement(self) -> Self; + /// Tries to convert `value` into [`Self`] and negates the resulting value. + fn from_u64_negated(value: u64) -> Result<Self>; } } @@ -108,19 +92,7 @@ fn from_str(src: &BStr) -> Result<Self> { let val = u64::from_str_radix(core::str::from_utf8(digits).map_err(|_| EINVAL)?, radix) .map_err(|_| EINVAL)?; - - if val > Self::abs_min() { - return Err(EINVAL); - } - - if val == Self::abs_min() { - return Ok(Self::MIN); - } - - // SAFETY: We checked that `val` will fit in `Self` above. - let val: Self = unsafe { val.try_into().unwrap_unchecked() }; - - Ok(val.complement()) + Self::from_u64_negated(val) } _ => { let (radix, digits) = strip_radix(src); @@ -131,41 +103,49 @@ fn from_str(src: &BStr) -> Result<Self> { } macro_rules! impl_parse_int { - ($ty:ty) => { - // SAFETY: We implement the trait according to the documentation. - unsafe impl private::FromStrRadix for $ty { - const MIN: Self = <$ty>::MIN; - - fn from_str_radix(src: &BStr, radix: u32) -> Result<Self, crate::error::Error> { - <$ty>::from_str_radix(core::str::from_utf8(src).map_err(|_| EINVAL)?, radix) - .map_err(|_| EINVAL) - } - - fn abs_min() -> u64 { - #[allow(unused_comparisons)] - if Self::MIN < 0 { - 1u64 << (Self::BITS - 1) - } else { - 0 + ($($ty:ty),*) => { + $( + impl private::FromStrRadix for $ty { + fn from_str_radix(src: &BStr, radix: u32) -> Result<Self> { + <$ty>::from_str_radix(core::str::from_utf8(src).map_err(|_| EINVAL)?, radix) + .map_err(|_| EINVAL) } - } - fn complement(self) -> Self { - (!self).wrapping_add((1 as $ty)) + fn from_u64_negated(value: u64) -> Result<Self> { + const ABS_MIN: u64 = { + #[allow(unused_comparisons)] + if <$ty>::MIN < 0 { + 1u64 << (Self::BITS - 1) + } else { + 0 + } + }; + + fn complement(self) -> Self { + (!self).wrapping_add((1 as $ty)) + } + if val > ABS_MIN { + return Err(EINVAL); + } + + if val == ABS_MIN { + return Ok(<$ty>::MIN); + } + + // SAFETY: The above checks guarantee that `val` fits into `Self`: + // - if `Self` is unsigned, then `ABS_MIN == 0` and thus we have returned above + // (either `EINVAL` or `MIN`). + // - if `Self` is signed, then we have that `0 <= val < ABS_MIN`. And since + // `ABS_MIN - 1` fits into `Self` by construction, `val` also does. + let val: Self = unsafe { val.try_into().unwrap_unchecked() }; + + Ok((!val).wrapping_add(1)) + } } - } - impl ParseInt for $ty {} + impl ParseInt for $ty {} + )* }; } -impl_parse_int!(i8); -impl_parse_int!(u8); -impl_parse_int!(i16); -impl_parse_int!(u16); -impl_parse_int!(i32); -impl_parse_int!(u32); -impl_parse_int!(i64); -impl_parse_int!(u64); -impl_parse_int!(isize); -impl_parse_int!(usize); +impl_parse_int![i8, u8, i16, u16, i32, u32, i64, u64, isize, usize];
"Benno Lossin" <lossin@kernel.org> writes: > On Thu Jun 19, 2025 at 1:12 PM CEST, Andreas Hindborg wrote: >> I'm having a difficult time parsing. Are you suggesting that we guard >> against implementations of `TryInto<u64>` that misbehave? > > Let me try a different explanation: > > The safety requirement for implementing the `FromStrRadix`: > > /// The member functions of this trait must be implemented according to > /// their documentation. > > Together with the functions of the trait: > > /// Parse `src` to [`Self`] using radix `radix`. > fn from_str_radix(src: &BStr, radix: u32) -> Result<Self, crate::error::Error>; > > /// Return the absolute value of [`Self::MIN`]. > fn abs_min() -> u64; > > /// Perform bitwise 2's complement on `self`. > /// > /// Note: This function does not make sense for unsigned integers. > fn complement(self) -> Self; > > Doesn't make sense. What does it mean to return the "absolute value of > [`Self::MIN`]"? We don't have "absolute value" defined for an arbitrary > type. Similarly for `complement` and `from_str_radix`, what does "Parse > `src` to [`Self`] using radex `radix`" mean? It's not well-defined. > > You use this safety requirement in the parsing branch for negative > numbers (the `unsafe` call at the bottom): > > [b'-', rest @ ..] => { > let (radix, digits) = strip_radix(rest.as_ref()); > // 2's complement values range from -2^(b-1) to 2^(b-1)-1. > // So if we want to parse negative numbers as positive and > // later multiply by -1, we have to parse into a larger > // integer. We choose `u64` as sufficiently large. > // > // NOTE: 128 bit integers are not available on all > // platforms, hence the choice of 64 bits. > let val = > u64::from_str_radix(core::str::from_utf8(digits).map_err(|_| EINVAL)?, radix) > .map_err(|_| EINVAL)?; > > if val > Self::abs_min() { > return Err(EINVAL); > } > > if val == Self::abs_min() { > return Ok(Self::MIN); > } > > // SAFETY: We checked that `val` will fit in `Self` above. > let val: Self = unsafe { val.try_into().unwrap_unchecked() }; > > Ok(val.complement()) > } > > But you don't mention that the check is valid due to the safety > requirements of implementing `FromStrRadix`. But even if you did, that > wouldn't mean anything as I explained above. > > So let's instead move all of this negation & u64 conversion logic into > the `FromStrRadix` trait. Then it can be safe & the `ParseInt::from_str` > function doesn't use `unsafe` (there still will be `unsafe` in the > macro, but that is fine, as it's more local and knows the concrete > types). > Alright. I guess my safety comments are slightly hand-wavy. Thanks for the suggestion, I'll apply that for next spin. Best regards, Andreas Hindborg
© 2016 - 2025 Red Hat, Inc.