Add a new PTE marker that results in any access causing the accessing
process to segfault.
This is preferable to PTE_MARKER_POISONED, which results in the same
handling as hardware poisoned memory, and is thus undesirable for cases
where we simply wish to 'soft' poison a range.
This is in preparation for implementing the ability to specify guard pages
at the page table level, i.e. ranges that, when accessed, should cause
process termination.
Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the
function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single
purpose was simply incorrect.
We then reuse the same logic to determine whether a zap should clear a
guard entry - this should only be performed on teardown and never on
MADV_DONTNEED or MADV_FREE.
We additionally add a WARN_ON_ONCE() in hugetlb logic should a guard marker
be encountered there, as we explicitly do not support this operation and
this should not occur.
Acked-by: Vlastimil Babka <vbabkba@suse.cz>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
include/linux/mm_inline.h | 2 +-
include/linux/swapops.h | 24 +++++++++++++++++++++++-
mm/hugetlb.c | 4 ++++
mm/memory.c | 18 +++++++++++++++---
mm/mprotect.c | 6 ++++--
5 files changed, 47 insertions(+), 7 deletions(-)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 355cf46a01a6..1b6a917fffa4 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -544,7 +544,7 @@ static inline pte_marker copy_pte_marker(
{
pte_marker srcm = pte_marker_get(entry);
/* Always copy error entries. */
- pte_marker dstm = srcm & PTE_MARKER_POISONED;
+ pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD);
/* Only copy PTE markers if UFFD register matches. */
if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cb468e418ea1..96f26e29fefe 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -426,9 +426,19 @@ typedef unsigned long pte_marker;
* "Poisoned" here is meant in the very general sense of "future accesses are
* invalid", instead of referring very specifically to hardware memory errors.
* This marker is meant to represent any of various different causes of this.
+ *
+ * Note that, when encountered by the faulting logic, PTEs with this marker will
+ * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error
+ * logic.
*/
#define PTE_MARKER_POISONED BIT(1)
-#define PTE_MARKER_MASK (BIT(2) - 1)
+/*
+ * Indicates that, on fault, this PTE will case a SIGSEGV signal to be
+ * sent. This means guard markers behave in effect as if the region were mapped
+ * PROT_NONE, rather than if they were a memory hole or equivalent.
+ */
+#define PTE_MARKER_GUARD BIT(2)
+#define PTE_MARKER_MASK (BIT(3) - 1)
static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
{
@@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry)
{
return is_pte_marker_entry(entry) &&
(pte_marker_get(entry) & PTE_MARKER_POISONED);
+
+}
+
+static inline swp_entry_t make_guard_swp_entry(void)
+{
+ return make_pte_marker_entry(PTE_MARKER_GUARD);
+}
+
+static inline int is_guard_swp_entry(swp_entry_t entry)
+{
+ return is_pte_marker_entry(entry) &&
+ (pte_marker_get(entry) & PTE_MARKER_GUARD);
}
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 906294ac85dc..2c8c5da0f5d3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6353,6 +6353,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
ret = VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
goto out_mutex;
+ } else if (WARN_ON_ONCE(marker & PTE_MARKER_GUARD)) {
+ /* This isn't supported in hugetlb. */
+ ret = VM_FAULT_SIGSEGV;
+ goto out_mutex;
}
}
diff --git a/mm/memory.c b/mm/memory.c
index 0f614523b9f4..551455cd453f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1455,7 +1455,7 @@ static inline bool should_zap_folio(struct zap_details *details,
return !folio_test_anon(folio);
}
-static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
+static inline bool zap_drop_markers(struct zap_details *details)
{
if (!details)
return false;
@@ -1476,7 +1476,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
if (vma_is_anonymous(vma))
return;
- if (zap_drop_file_uffd_wp(details))
+ if (zap_drop_markers(details))
return;
for (;;) {
@@ -1671,7 +1671,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
* drop the marker if explicitly requested.
*/
if (!vma_is_anonymous(vma) &&
- !zap_drop_file_uffd_wp(details))
+ !zap_drop_markers(details))
+ continue;
+ } else if (is_guard_swp_entry(entry)) {
+ /*
+ * Ordinary zapping should not remove guard PTE
+ * markers. Only do so if we should remove PTE markers
+ * in general.
+ */
+ if (!zap_drop_markers(details))
continue;
} else if (is_hwpoison_entry(entry) ||
is_poisoned_swp_entry(entry)) {
@@ -4003,6 +4011,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
if (marker & PTE_MARKER_POISONED)
return VM_FAULT_HWPOISON;
+ /* Hitting a guard page is always a fatal condition. */
+ if (marker & PTE_MARKER_GUARD)
+ return VM_FAULT_SIGSEGV;
+
if (pte_marker_entry_uffd_wp(entry))
return pte_marker_handle_uffd_wp(vmf);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 0c5d6d06107d..1f671b0667bd 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -236,9 +236,11 @@ static long change_pte_range(struct mmu_gather *tlb,
} else if (is_pte_marker_entry(entry)) {
/*
* Ignore error swap entries unconditionally,
- * because any access should sigbus anyway.
+ * because any access should sigbus/sigsegv
+ * anyway.
*/
- if (is_poisoned_swp_entry(entry))
+ if (is_poisoned_swp_entry(entry) ||
+ is_guard_swp_entry(entry))
continue;
/*
* If this is uffd-wp pte marker and we'd like
--
2.47.0
On Wed Oct 23, 2024 at 7:24 PM EEST, Lorenzo Stoakes wrote: > Add a new PTE marker that results in any access causing the accessing > process to segfault. > > This is preferable to PTE_MARKER_POISONED, which results in the same > handling as hardware poisoned memory, and is thus undesirable for cases > where we simply wish to 'soft' poison a range. > > This is in preparation for implementing the ability to specify guard pages > at the page table level, i.e. ranges that, when accessed, should cause > process termination. > > Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the > function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single > purpose was simply incorrect. > > We then reuse the same logic to determine whether a zap should clear a > guard entry - this should only be performed on teardown and never on > MADV_DONTNEED or MADV_FREE. > > We additionally add a WARN_ON_ONCE() in hugetlb logic should a guard marker > be encountered there, as we explicitly do not support this operation and > this should not occur. > > Acked-by: Vlastimil Babka <vbabkba@suse.cz> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- > include/linux/mm_inline.h | 2 +- > include/linux/swapops.h | 24 +++++++++++++++++++++++- > mm/hugetlb.c | 4 ++++ > mm/memory.c | 18 +++++++++++++++--- > mm/mprotect.c | 6 ++++-- > 5 files changed, 47 insertions(+), 7 deletions(-) > > diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h > index 355cf46a01a6..1b6a917fffa4 100644 > --- a/include/linux/mm_inline.h > +++ b/include/linux/mm_inline.h > @@ -544,7 +544,7 @@ static inline pte_marker copy_pte_marker( > { > pte_marker srcm = pte_marker_get(entry); > /* Always copy error entries. */ > - pte_marker dstm = srcm & PTE_MARKER_POISONED; > + pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD); > > /* Only copy PTE markers if UFFD register matches. */ > if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma)) > diff --git a/include/linux/swapops.h b/include/linux/swapops.h > index cb468e418ea1..96f26e29fefe 100644 > --- a/include/linux/swapops.h > +++ b/include/linux/swapops.h > @@ -426,9 +426,19 @@ typedef unsigned long pte_marker; > * "Poisoned" here is meant in the very general sense of "future accesses are > * invalid", instead of referring very specifically to hardware memory errors. > * This marker is meant to represent any of various different causes of this. > + * > + * Note that, when encountered by the faulting logic, PTEs with this marker will > + * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error > + * logic. > */ > #define PTE_MARKER_POISONED BIT(1) > -#define PTE_MARKER_MASK (BIT(2) - 1) > +/* > + * Indicates that, on fault, this PTE will case a SIGSEGV signal to be > + * sent. This means guard markers behave in effect as if the region were mapped > + * PROT_NONE, rather than if they were a memory hole or equivalent. > + */ > +#define PTE_MARKER_GUARD BIT(2) > +#define PTE_MARKER_MASK (BIT(3) - 1) > > static inline swp_entry_t make_pte_marker_entry(pte_marker marker) > { > @@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry) > { > return is_pte_marker_entry(entry) && > (pte_marker_get(entry) & PTE_MARKER_POISONED); > + > +} > + > +static inline swp_entry_t make_guard_swp_entry(void) > +{ > + return make_pte_marker_entry(PTE_MARKER_GUARD); > +} > + > +static inline int is_guard_swp_entry(swp_entry_t entry) > +{ > + return is_pte_marker_entry(entry) && > + (pte_marker_get(entry) & PTE_MARKER_GUARD); > } > > /* > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 906294ac85dc..2c8c5da0f5d3 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -6353,6 +6353,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, > ret = VM_FAULT_HWPOISON_LARGE | > VM_FAULT_SET_HINDEX(hstate_index(h)); > goto out_mutex; > + } else if (WARN_ON_ONCE(marker & PTE_MARKER_GUARD)) { > + /* This isn't supported in hugetlb. */ > + ret = VM_FAULT_SIGSEGV; > + goto out_mutex; > } > } > > diff --git a/mm/memory.c b/mm/memory.c > index 0f614523b9f4..551455cd453f 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -1455,7 +1455,7 @@ static inline bool should_zap_folio(struct zap_details *details, > return !folio_test_anon(folio); > } > > -static inline bool zap_drop_file_uffd_wp(struct zap_details *details) > +static inline bool zap_drop_markers(struct zap_details *details) > { > if (!details) > return false; > @@ -1476,7 +1476,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, > if (vma_is_anonymous(vma)) > return; > > - if (zap_drop_file_uffd_wp(details)) > + if (zap_drop_markers(details)) > return; > > for (;;) { > @@ -1671,7 +1671,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > * drop the marker if explicitly requested. > */ > if (!vma_is_anonymous(vma) && > - !zap_drop_file_uffd_wp(details)) > + !zap_drop_markers(details)) > + continue; > + } else if (is_guard_swp_entry(entry)) { > + /* > + * Ordinary zapping should not remove guard PTE > + * markers. Only do so if we should remove PTE markers > + * in general. > + */ > + if (!zap_drop_markers(details)) > continue; > } else if (is_hwpoison_entry(entry) || > is_poisoned_swp_entry(entry)) { > @@ -4003,6 +4011,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) > if (marker & PTE_MARKER_POISONED) > return VM_FAULT_HWPOISON; > > + /* Hitting a guard page is always a fatal condition. */ > + if (marker & PTE_MARKER_GUARD) > + return VM_FAULT_SIGSEGV; > + > if (pte_marker_entry_uffd_wp(entry)) > return pte_marker_handle_uffd_wp(vmf); > > diff --git a/mm/mprotect.c b/mm/mprotect.c > index 0c5d6d06107d..1f671b0667bd 100644 > --- a/mm/mprotect.c > +++ b/mm/mprotect.c > @@ -236,9 +236,11 @@ static long change_pte_range(struct mmu_gather *tlb, > } else if (is_pte_marker_entry(entry)) { > /* > * Ignore error swap entries unconditionally, > - * because any access should sigbus anyway. > + * because any access should sigbus/sigsegv > + * anyway. > */ > - if (is_poisoned_swp_entry(entry)) > + if (is_poisoned_swp_entry(entry) || > + is_guard_swp_entry(entry)) > continue; > /* > * If this is uffd-wp pte marker and we'd like Acked-by: Jarkko Sakkinen <jarkko@kernel.org> BR, Jarkko
© 2016 - 2024 Red Hat, Inc.