This type will be used when setting up a new vma in an f_ops->mmap()
hook. Using a separate type from VmAreaRef allows us to have a separate
set of operations that you are only able to use during the mmap() hook.
For example, the VM_MIXEDMAP flag must not be changed after the initial
setup that happens during the f_ops->mmap() hook.
To avoid setting invalid flag values, the methods for clearing
VM_MAYWRITE and similar involve a check of VM_WRITE, and return an error
if VM_WRITE is set. Trying to use `try_clear_maywrite` without checking
the return value results in a compilation error because the `Result`
type is marked #[must_use].
For now, there's only a method for VM_MIXEDMAP and not VM_PFNMAP. When
we add a VM_PFNMAP method, we will need some way to prevent you from
setting both VM_MIXEDMAP and VM_PFNMAP on the same vma.
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
rust/kernel/mm/virt.rs | 179 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 178 insertions(+), 1 deletion(-)
diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs
index 3e494e40b530..2a49c29a49c7 100644
--- a/rust/kernel/mm/virt.rs
+++ b/rust/kernel/mm/virt.rs
@@ -6,7 +6,7 @@
use crate::{
bindings,
- error::{to_result, Result},
+ error::{code::EINVAL, to_result, Result},
page::Page,
types::Opaque,
};
@@ -155,6 +155,183 @@ pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result {
}
}
+/// A builder for setting up a vma in an `f_ops->mmap()` hook.
+///
+/// # Invariants
+///
+/// For the duration of 'a, the referenced vma must be undergoing initialization in an
+/// `f_ops->mmap()` hook.
+pub struct VmAreaNew {
+ vma: VmAreaRef,
+}
+
+// Make all `VmAreaRef` methods available on `VmAreaNew`.
+impl Deref for VmAreaNew {
+ type Target = VmAreaRef;
+
+ #[inline]
+ fn deref(&self) -> &VmAreaRef {
+ &self.vma
+ }
+}
+
+impl VmAreaNew {
+ /// Access a virtual memory area given a raw pointer.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `vma` is undergoing initial vma setup for the duration of 'a.
+ #[inline]
+ pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {
+ // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
+ unsafe { &*vma.cast() }
+ }
+
+ /// Internal method for updating the vma flags.
+ ///
+ /// # Safety
+ ///
+ /// This must not be used to set the flags to an invalid value.
+ #[inline]
+ unsafe fn update_flags(&self, set: vm_flags_t, unset: vm_flags_t) {
+ let mut flags = self.flags();
+ flags |= set;
+ flags &= !unset;
+
+ // SAFETY: This is not a data race: the vma is undergoing initial setup, so it's not yet
+ // shared. Additionally, `VmAreaNew` is `!Sync`, so it cannot be used to write in parallel.
+ // The caller promises that this does not set the flags to an invalid value.
+ unsafe { (*self.as_ptr()).__bindgen_anon_2.__vm_flags = flags };
+ }
+
+ /// Set the `VM_MIXEDMAP` flag on this vma.
+ ///
+ /// This enables the vma to contain both `struct page` and pure PFN pages. Returns a reference
+ /// that can be used to call `vm_insert_page` on the vma.
+ #[inline]
+ pub fn set_mixedmap(&self) -> &VmAreaMixedMap {
+ // SAFETY: We don't yet provide a way to set VM_PFNMAP, so this cannot put the flags in an
+ // invalid state.
+ unsafe { self.update_flags(flags::MIXEDMAP, 0) };
+
+ // SAFETY: We just set `VM_MIXEDMAP` on the vma.
+ unsafe { VmAreaMixedMap::from_raw(self.vma.as_ptr()) }
+ }
+
+ /// Set the `VM_IO` flag on this vma.
+ ///
+ /// This marks the vma as being a memory-mapped I/O region.
+ #[inline]
+ pub fn set_io(&self) {
+ // SAFETY: Setting the VM_IO flag is always okay.
+ unsafe { self.update_flags(flags::IO, 0) };
+ }
+
+ /// Set the `VM_DONTEXPAND` flag on this vma.
+ ///
+ /// This prevents the vma from being expanded with `mremap()`.
+ #[inline]
+ pub fn set_dontexpand(&self) {
+ // SAFETY: Setting the VM_DONTEXPAND flag is always okay.
+ unsafe { self.update_flags(flags::DONTEXPAND, 0) };
+ }
+
+ /// Set the `VM_DONTCOPY` flag on this vma.
+ ///
+ /// This prevents the vma from being copied on fork. This option is only permanent if `VM_IO`
+ /// is set.
+ #[inline]
+ pub fn set_dontcopy(&self) {
+ // SAFETY: Setting the VM_DONTCOPY flag is always okay.
+ unsafe { self.update_flags(flags::DONTCOPY, 0) };
+ }
+
+ /// Set the `VM_DONTDUMP` flag on this vma.
+ ///
+ /// This prevents the vma from being included in core dumps. This option is only permanent if
+ /// `VM_IO` is set.
+ #[inline]
+ pub fn set_dontdump(&self) {
+ // SAFETY: Setting the VM_DONTDUMP flag is always okay.
+ unsafe { self.update_flags(flags::DONTDUMP, 0) };
+ }
+
+ /// Returns whether `VM_READ` is set.
+ ///
+ /// This flag indicates whether userspace is mapping this vma as readable.
+ #[inline]
+ pub fn get_read(&self) -> bool {
+ (self.flags() & flags::READ) != 0
+ }
+
+ /// Try to clear the `VM_MAYREAD` flag, failing if `VM_READ` is set.
+ ///
+ /// This flag indicates whether userspace is allowed to make this vma readable with
+ /// `mprotect()`.
+ ///
+ /// Note that this operation is irreversible. Once `VM_MAYREAD` has been cleared, it can never
+ /// be set again.
+ #[inline]
+ pub fn try_clear_mayread(&self) -> Result {
+ if self.get_read() {
+ return Err(EINVAL);
+ }
+ // SAFETY: Clearing `VM_MAYREAD` is okay when `VM_READ` is not set.
+ unsafe { self.update_flags(0, flags::MAYREAD) };
+ Ok(())
+ }
+
+ /// Returns whether `VM_WRITE` is set.
+ ///
+ /// This flag indicates whether userspace is mapping this vma as writable.
+ #[inline]
+ pub fn get_write(&self) -> bool {
+ (self.flags() & flags::WRITE) != 0
+ }
+
+ /// Try to clear the `VM_MAYWRITE` flag, failing if `VM_WRITE` is set.
+ ///
+ /// This flag indicates whether userspace is allowed to make this vma writable with
+ /// `mprotect()`.
+ ///
+ /// Note that this operation is irreversible. Once `VM_MAYWRITE` has been cleared, it can never
+ /// be set again.
+ #[inline]
+ pub fn try_clear_maywrite(&self) -> Result {
+ if self.get_write() {
+ return Err(EINVAL);
+ }
+ // SAFETY: Clearing `VM_MAYWRITE` is okay when `VM_WRITE` is not set.
+ unsafe { self.update_flags(0, flags::MAYWRITE) };
+ Ok(())
+ }
+
+ /// Returns whether `VM_EXEC` is set.
+ ///
+ /// This flag indicates whether userspace is mapping this vma as executable.
+ #[inline]
+ pub fn get_exec(&self) -> bool {
+ (self.flags() & flags::EXEC) != 0
+ }
+
+ /// Try to clear the `VM_MAYEXEC` flag, failing if `VM_EXEC` is set.
+ ///
+ /// This flag indicates whether userspace is allowed to make this vma executable with
+ /// `mprotect()`.
+ ///
+ /// Note that this operation is irreversible. Once `VM_MAYEXEC` has been cleared, it can never
+ /// be set again.
+ #[inline]
+ pub fn try_clear_mayexec(&self) -> Result {
+ if self.get_exec() {
+ return Err(EINVAL);
+ }
+ // SAFETY: Clearing `VM_MAYEXEC` is okay when `VM_EXEC` is not set.
+ unsafe { self.update_flags(0, flags::MAYEXEC) };
+ Ok(())
+ }
+}
+
/// The integer type used for vma flags.
#[doc(inline)]
pub use bindings::vm_flags_t;
--
2.47.0.371.ga323438b13-goog
On Fri, Nov 22, 2024 at 4:41 PM Alice Ryhl <aliceryhl@google.com> wrote:
> This type will be used when setting up a new vma in an f_ops->mmap()
> hook. Using a separate type from VmAreaRef allows us to have a separate
> set of operations that you are only able to use during the mmap() hook.
> For example, the VM_MIXEDMAP flag must not be changed after the initial
> setup that happens during the f_ops->mmap() hook.
>
> To avoid setting invalid flag values, the methods for clearing
> VM_MAYWRITE and similar involve a check of VM_WRITE, and return an error
> if VM_WRITE is set. Trying to use `try_clear_maywrite` without checking
> the return value results in a compilation error because the `Result`
> type is marked #[must_use].
>
> For now, there's only a method for VM_MIXEDMAP and not VM_PFNMAP. When
> we add a VM_PFNMAP method, we will need some way to prevent you from
> setting both VM_MIXEDMAP and VM_PFNMAP on the same vma.
>
> Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Thanks, this looks really neat!
Reviewed-by: Jann Horn <jannh@google.com>
> + /// Set the `VM_IO` flag on this vma.
> + ///
> + /// This marks the vma as being a memory-mapped I/O region.
nit: VM_IO isn't really exclusively used for MMIO; the header comment
says "Memory mapped I/O or similar", while the comment in
remap_pfn_range_internal() says "VM_IO tells people not to look at
these pages (accesses can have side effects)". But I don't really have
a good definition of what VM_IO actually means; so I don't really have
a concrete suggestion for what do do here. So my comment isn't very
actionable, I guess it's fine to leave this as-is unless someone
actually has a good definition...
> + #[inline]
> + pub fn set_io(&self) {
> + // SAFETY: Setting the VM_IO flag is always okay.
> + unsafe { self.update_flags(flags::IO, 0) };
> + }
On Tue, Nov 26, 2024 at 10:30 PM Jann Horn <jannh@google.com> wrote: > > On Fri, Nov 22, 2024 at 4:41 PM Alice Ryhl <aliceryhl@google.com> wrote: > > This type will be used when setting up a new vma in an f_ops->mmap() > > hook. Using a separate type from VmAreaRef allows us to have a separate > > set of operations that you are only able to use during the mmap() hook. > > For example, the VM_MIXEDMAP flag must not be changed after the initial > > setup that happens during the f_ops->mmap() hook. > > > > To avoid setting invalid flag values, the methods for clearing > > VM_MAYWRITE and similar involve a check of VM_WRITE, and return an error > > if VM_WRITE is set. Trying to use `try_clear_maywrite` without checking > > the return value results in a compilation error because the `Result` > > type is marked #[must_use]. > > > > For now, there's only a method for VM_MIXEDMAP and not VM_PFNMAP. When > > we add a VM_PFNMAP method, we will need some way to prevent you from > > setting both VM_MIXEDMAP and VM_PFNMAP on the same vma. > > > > Signed-off-by: Alice Ryhl <aliceryhl@google.com> > > Thanks, this looks really neat! > > Reviewed-by: Jann Horn <jannh@google.com> > > > + /// Set the `VM_IO` flag on this vma. > > + /// > > + /// This marks the vma as being a memory-mapped I/O region. > > nit: VM_IO isn't really exclusively used for MMIO; the header comment > says "Memory mapped I/O or similar", while the comment in > remap_pfn_range_internal() says "VM_IO tells people not to look at > these pages (accesses can have side effects)". But I don't really have > a good definition of what VM_IO actually means; so I don't really have > a concrete suggestion for what do do here. So my comment isn't very > actionable, I guess it's fine to leave this as-is unless someone > actually has a good definition... I can use this comment? This is used for memory mapped IO and similar. The flag tells other parts of the kernel to not look at the pages. For memory mapped IO this is useful as accesses to the pages could have side effects. Alice
On Wed, Nov 27, 2024 at 1:38 PM Alice Ryhl <aliceryhl@google.com> wrote: > On Tue, Nov 26, 2024 at 10:30 PM Jann Horn <jannh@google.com> wrote: > > On Fri, Nov 22, 2024 at 4:41 PM Alice Ryhl <aliceryhl@google.com> wrote: > > > This type will be used when setting up a new vma in an f_ops->mmap() > > > hook. Using a separate type from VmAreaRef allows us to have a separate > > > set of operations that you are only able to use during the mmap() hook. > > > For example, the VM_MIXEDMAP flag must not be changed after the initial > > > setup that happens during the f_ops->mmap() hook. > > > > > > To avoid setting invalid flag values, the methods for clearing > > > VM_MAYWRITE and similar involve a check of VM_WRITE, and return an error > > > if VM_WRITE is set. Trying to use `try_clear_maywrite` without checking > > > the return value results in a compilation error because the `Result` > > > type is marked #[must_use]. > > > > > > For now, there's only a method for VM_MIXEDMAP and not VM_PFNMAP. When > > > we add a VM_PFNMAP method, we will need some way to prevent you from > > > setting both VM_MIXEDMAP and VM_PFNMAP on the same vma. > > > > > > Signed-off-by: Alice Ryhl <aliceryhl@google.com> > > > > Thanks, this looks really neat! > > > > Reviewed-by: Jann Horn <jannh@google.com> > > > > > + /// Set the `VM_IO` flag on this vma. > > > + /// > > > + /// This marks the vma as being a memory-mapped I/O region. > > > > nit: VM_IO isn't really exclusively used for MMIO; the header comment > > says "Memory mapped I/O or similar", while the comment in > > remap_pfn_range_internal() says "VM_IO tells people not to look at > > these pages (accesses can have side effects)". But I don't really have > > a good definition of what VM_IO actually means; so I don't really have > > a concrete suggestion for what do do here. So my comment isn't very > > actionable, I guess it's fine to leave this as-is unless someone > > actually has a good definition... > > I can use this comment? > > This is used for memory mapped IO and similar. The flag tells other > parts of the kernel to not look at the pages. For memory mapped IO > this is useful as accesses to the pages could have side effects. Yeah, sounds reasonable.
On Fri, Nov 22, 2024 at 03:40:31PM +0000, Alice Ryhl wrote:
> This type will be used when setting up a new vma in an f_ops->mmap()
> hook. Using a separate type from VmAreaRef allows us to have a separate
> set of operations that you are only able to use during the mmap() hook.
> For example, the VM_MIXEDMAP flag must not be changed after the initial
> setup that happens during the f_ops->mmap() hook.
Nice, thanks!
>
> To avoid setting invalid flag values, the methods for clearing
> VM_MAYWRITE and similar involve a check of VM_WRITE, and return an error
> if VM_WRITE is set. Trying to use `try_clear_maywrite` without checking
> the return value results in a compilation error because the `Result`
> type is marked #[must_use].
>
> For now, there's only a method for VM_MIXEDMAP and not VM_PFNMAP. When
> we add a VM_PFNMAP method, we will need some way to prevent you from
> setting both VM_MIXEDMAP and VM_PFNMAP on the same vma.
>
> Signed-off-by: Alice Ryhl <aliceryhl@google.com>
LGTM, so:
Acked-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> (for mm bits)
> ---
> rust/kernel/mm/virt.rs | 179 ++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 178 insertions(+), 1 deletion(-)
>
> diff --git a/rust/kernel/mm/virt.rs b/rust/kernel/mm/virt.rs
> index 3e494e40b530..2a49c29a49c7 100644
> --- a/rust/kernel/mm/virt.rs
> +++ b/rust/kernel/mm/virt.rs
> @@ -6,7 +6,7 @@
>
> use crate::{
> bindings,
> - error::{to_result, Result},
> + error::{code::EINVAL, to_result, Result},
> page::Page,
> types::Opaque,
> };
> @@ -155,6 +155,183 @@ pub fn vm_insert_page(&self, address: usize, page: &Page) -> Result {
> }
> }
>
> +/// A builder for setting up a vma in an `f_ops->mmap()` hook.
> +///
> +/// # Invariants
> +///
> +/// For the duration of 'a, the referenced vma must be undergoing initialization in an
> +/// `f_ops->mmap()` hook.
> +pub struct VmAreaNew {
> + vma: VmAreaRef,
> +}
> +
> +// Make all `VmAreaRef` methods available on `VmAreaNew`.
> +impl Deref for VmAreaNew {
> + type Target = VmAreaRef;
> +
> + #[inline]
> + fn deref(&self) -> &VmAreaRef {
> + &self.vma
> + }
> +}
> +
> +impl VmAreaNew {
> + /// Access a virtual memory area given a raw pointer.
> + ///
> + /// # Safety
> + ///
> + /// Callers must ensure that `vma` is undergoing initial vma setup for the duration of 'a.
> + #[inline]
> + pub unsafe fn from_raw<'a>(vma: *const bindings::vm_area_struct) -> &'a Self {
> + // SAFETY: The caller ensures that the invariants are satisfied for the duration of 'a.
> + unsafe { &*vma.cast() }
> + }
> +
> + /// Internal method for updating the vma flags.
> + ///
> + /// # Safety
> + ///
> + /// This must not be used to set the flags to an invalid value.
> + #[inline]
> + unsafe fn update_flags(&self, set: vm_flags_t, unset: vm_flags_t) {
> + let mut flags = self.flags();
> + flags |= set;
> + flags &= !unset;
> +
> + // SAFETY: This is not a data race: the vma is undergoing initial setup, so it's not yet
> + // shared. Additionally, `VmAreaNew` is `!Sync`, so it cannot be used to write in parallel.
> + // The caller promises that this does not set the flags to an invalid value.
> + unsafe { (*self.as_ptr()).__bindgen_anon_2.__vm_flags = flags };
Thanks.
> + }
> +
> + /// Set the `VM_MIXEDMAP` flag on this vma.
> + ///
> + /// This enables the vma to contain both `struct page` and pure PFN pages. Returns a reference
> + /// that can be used to call `vm_insert_page` on the vma.
> + #[inline]
> + pub fn set_mixedmap(&self) -> &VmAreaMixedMap {
> + // SAFETY: We don't yet provide a way to set VM_PFNMAP, so this cannot put the flags in an
> + // invalid state.
> + unsafe { self.update_flags(flags::MIXEDMAP, 0) };
> +
> + // SAFETY: We just set `VM_MIXEDMAP` on the vma.
> + unsafe { VmAreaMixedMap::from_raw(self.vma.as_ptr()) }
> + }
> +
> + /// Set the `VM_IO` flag on this vma.
> + ///
> + /// This marks the vma as being a memory-mapped I/O region.
> + #[inline]
> + pub fn set_io(&self) {
> + // SAFETY: Setting the VM_IO flag is always okay.
> + unsafe { self.update_flags(flags::IO, 0) };
> + }
> +
> + /// Set the `VM_DONTEXPAND` flag on this vma.
> + ///
> + /// This prevents the vma from being expanded with `mremap()`.
> + #[inline]
> + pub fn set_dontexpand(&self) {
> + // SAFETY: Setting the VM_DONTEXPAND flag is always okay.
> + unsafe { self.update_flags(flags::DONTEXPAND, 0) };
> + }
> +
> + /// Set the `VM_DONTCOPY` flag on this vma.
> + ///
> + /// This prevents the vma from being copied on fork. This option is only permanent if `VM_IO`
> + /// is set.
> + #[inline]
> + pub fn set_dontcopy(&self) {
> + // SAFETY: Setting the VM_DONTCOPY flag is always okay.
> + unsafe { self.update_flags(flags::DONTCOPY, 0) };
> + }
> +
> + /// Set the `VM_DONTDUMP` flag on this vma.
> + ///
> + /// This prevents the vma from being included in core dumps. This option is only permanent if
> + /// `VM_IO` is set.
> + #[inline]
> + pub fn set_dontdump(&self) {
> + // SAFETY: Setting the VM_DONTDUMP flag is always okay.
> + unsafe { self.update_flags(flags::DONTDUMP, 0) };
> + }
> +
> + /// Returns whether `VM_READ` is set.
> + ///
> + /// This flag indicates whether userspace is mapping this vma as readable.
> + #[inline]
> + pub fn get_read(&self) -> bool {
> + (self.flags() & flags::READ) != 0
> + }
> +
> + /// Try to clear the `VM_MAYREAD` flag, failing if `VM_READ` is set.
> + ///
> + /// This flag indicates whether userspace is allowed to make this vma readable with
> + /// `mprotect()`.
> + ///
> + /// Note that this operation is irreversible. Once `VM_MAYREAD` has been cleared, it can never
> + /// be set again.
> + #[inline]
> + pub fn try_clear_mayread(&self) -> Result {
> + if self.get_read() {
> + return Err(EINVAL);
> + }
> + // SAFETY: Clearing `VM_MAYREAD` is okay when `VM_READ` is not set.
> + unsafe { self.update_flags(0, flags::MAYREAD) };
> + Ok(())
> + }
> +
> + /// Returns whether `VM_WRITE` is set.
> + ///
> + /// This flag indicates whether userspace is mapping this vma as writable.
> + #[inline]
> + pub fn get_write(&self) -> bool {
> + (self.flags() & flags::WRITE) != 0
> + }
> +
> + /// Try to clear the `VM_MAYWRITE` flag, failing if `VM_WRITE` is set.
> + ///
> + /// This flag indicates whether userspace is allowed to make this vma writable with
> + /// `mprotect()`.
> + ///
> + /// Note that this operation is irreversible. Once `VM_MAYWRITE` has been cleared, it can never
> + /// be set again.
> + #[inline]
> + pub fn try_clear_maywrite(&self) -> Result {
> + if self.get_write() {
> + return Err(EINVAL);
> + }
> + // SAFETY: Clearing `VM_MAYWRITE` is okay when `VM_WRITE` is not set.
> + unsafe { self.update_flags(0, flags::MAYWRITE) };
> + Ok(())
> + }
> +
> + /// Returns whether `VM_EXEC` is set.
> + ///
> + /// This flag indicates whether userspace is mapping this vma as executable.
> + #[inline]
> + pub fn get_exec(&self) -> bool {
> + (self.flags() & flags::EXEC) != 0
> + }
> +
> + /// Try to clear the `VM_MAYEXEC` flag, failing if `VM_EXEC` is set.
> + ///
> + /// This flag indicates whether userspace is allowed to make this vma executable with
> + /// `mprotect()`.
> + ///
> + /// Note that this operation is irreversible. Once `VM_MAYEXEC` has been cleared, it can never
> + /// be set again.
> + #[inline]
> + pub fn try_clear_mayexec(&self) -> Result {
> + if self.get_exec() {
> + return Err(EINVAL);
> + }
> + // SAFETY: Clearing `VM_MAYEXEC` is okay when `VM_EXEC` is not set.
> + unsafe { self.update_flags(0, flags::MAYEXEC) };
> + Ok(())
> + }
> +}
> +
> /// The integer type used for vma flags.
> #[doc(inline)]
> pub use bindings::vm_flags_t;
>
> --
> 2.47.0.371.ga323438b13-goog
>
© 2016 - 2026 Red Hat, Inc.