[PATCH 15/16] fs/proc: update vmcore to use .proc_mmap_[prepare, complete]

Lorenzo Stoakes posted 16 patches 1 day, 14 hours ago
[PATCH 15/16] fs/proc: update vmcore to use .proc_mmap_[prepare, complete]
Posted by Lorenzo Stoakes 1 day, 13 hours ago
Now are able to use mmap_prepare, complete callbacks for procfs
implementations, update the vmcore implementation accordingly.

As part of this change, we must also update remap_vmalloc_range_partial()
to optionally not update VMA flags. Other than then remap_vmalloc_range()
wrapper, vmcore is the only user of this function so we can simply go ahead
and add a parameter.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 arch/s390/kernel/crash_dump.c |  6 ++--
 fs/proc/vmcore.c              | 53 +++++++++++++++++++++++++----------
 include/linux/vmalloc.h       | 10 +++----
 mm/vmalloc.c                  | 16 +++++++++--
 4 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d4839de8ce9d..44d7902f7e41 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -186,7 +186,7 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
 
 	if (pfn < oldmem_data.size >> PAGE_SHIFT) {
 		size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT));
-		rc = remap_pfn_range(vma, from,
+		rc = remap_pfn_range_complete(vma, from,
 				     pfn + (oldmem_data.start >> PAGE_SHIFT),
 				     size_old, prot);
 		if (rc || size == size_old)
@@ -195,7 +195,7 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
 		from += size_old;
 		pfn += size_old >> PAGE_SHIFT;
 	}
-	return remap_pfn_range(vma, from, pfn, size, prot);
+	return remap_pfn_range_complete(vma, from, pfn, size, prot);
 }
 
 /*
@@ -220,7 +220,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
 		from += size_hsa;
 		pfn += size_hsa >> PAGE_SHIFT;
 	}
-	return remap_pfn_range(vma, from, pfn, size, prot);
+	return remap_pfn_range_complete(vma, from, pfn, size, prot);
 }
 
 /*
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index f188bd900eb2..5e4e19c38d5e 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -254,7 +254,7 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
 				  unsigned long size, pgprot_t prot)
 {
 	prot = pgprot_encrypted(prot);
-	return remap_pfn_range(vma, from, pfn, size, prot);
+	return remap_pfn_range_complete(vma, from, pfn, size, prot);
 }
 
 /*
@@ -308,7 +308,7 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
 			tsz = min(offset + (u64)dump->size - start, (u64)size);
 			buf = dump->buf + start - offset;
 			if (remap_vmalloc_range_partial(vma, dst, buf, 0,
-							tsz))
+							tsz, /* set_vma= */false))
 				return -EFAULT;
 
 			size -= tsz;
@@ -588,24 +588,40 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
 	return ret;
 }
 
-static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
+static int mmap_prepare_vmcore(struct vm_area_desc *desc)
 {
-	size_t size = vma->vm_end - vma->vm_start;
-	u64 start, end, len, tsz;
-	struct vmcore_range *m;
+	size_t size = vma_desc_size(desc);
+	u64 start, end;
 
-	start = (u64)vma->vm_pgoff << PAGE_SHIFT;
+	start = (u64)desc->pgoff << PAGE_SHIFT;
 	end = start + size;
 
 	if (size > vmcore_size || end > vmcore_size)
 		return -EINVAL;
 
-	if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+	if (desc->vm_flags & (VM_WRITE | VM_EXEC))
 		return -EPERM;
 
-	vm_flags_mod(vma, VM_MIXEDMAP, VM_MAYWRITE | VM_MAYEXEC);
-	vma->vm_ops = &vmcore_mmap_ops;
+	desc->vm_flags |= VM_MIXEDMAP | VM_REMAP_FLAGS;
+	desc->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+	desc->vm_ops = &vmcore_mmap_ops;
+
+	/*
+	 * No need for remap_pfn_range_prepare() as we ensure non-CoW by
+	 * clearing VM_MAYWRITE.
+	 */
+
+	return 0;
+}
+
+static int mmap_complete_vmcore(struct file *file, struct vm_area_struct *vma,
+	const void *context)
+{
+	size_t size = vma->vm_end - vma->vm_start;
+	u64 start, len, tsz;
+	struct vmcore_range *m;
 
+	start = (u64)vma->vm_pgoff << PAGE_SHIFT;
 	len = 0;
 
 	if (start < elfcorebuf_sz) {
@@ -613,8 +629,8 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 
 		tsz = min(elfcorebuf_sz - (size_t)start, size);
 		pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT;
-		if (remap_pfn_range(vma, vma->vm_start, pfn, tsz,
-				    vma->vm_page_prot))
+		if (remap_pfn_range_complete(vma, vma->vm_start, pfn, tsz,
+					     vma->vm_page_prot))
 			return -EAGAIN;
 		size -= tsz;
 		start += tsz;
@@ -664,7 +680,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 		tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
 		kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz;
 		if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
-						kaddr, 0, tsz))
+				kaddr, 0, tsz, /* set_vma =*/false))
 			goto fail;
 
 		size -= tsz;
@@ -701,7 +717,13 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 	return -EAGAIN;
 }
 #else
-static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
+static int mmap_prepare_vmcore(struct vm_area_desc *desc)
+{
+	return -ENOSYS;
+}
+
+static int mmap_complete_vmcore(struct file *file, struct vm_area_struct *vma,
+		const void *context)
 {
 	return -ENOSYS;
 }
@@ -712,7 +734,8 @@ static const struct proc_ops vmcore_proc_ops = {
 	.proc_release	= release_vmcore,
 	.proc_read_iter	= read_vmcore,
 	.proc_lseek	= default_llseek,
-	.proc_mmap	= mmap_vmcore,
+	.proc_mmap_prepare = mmap_prepare_vmcore,
+	.proc_mmap_complete = mmap_complete_vmcore,
 };
 
 static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index eb54b7b3202f..588810e571aa 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -215,12 +215,12 @@ extern void *vmap(struct page **pages, unsigned int count,
 void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot);
 extern void vunmap(const void *addr);
 
-extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
-				       unsigned long uaddr, void *kaddr,
-				       unsigned long pgoff, unsigned long size);
+int remap_vmalloc_range_partial(struct vm_area_struct *vma,
+		unsigned long uaddr, void *kaddr, unsigned long pgoff,
+		unsigned long size, bool set_vma);
 
-extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
-							unsigned long pgoff);
+int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
+		unsigned long pgoff);
 
 int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot,
 		     struct page **pages, unsigned int page_shift);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4249e1e01947..877b557b2482 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4528,6 +4528,7 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
  * @kaddr:		virtual address of vmalloc kernel memory
  * @pgoff:		offset from @kaddr to start at
  * @size:		size of map area
+ * @set_vma:		If true, update VMA flags
  *
  * Returns:	0 for success, -Exxx on failure
  *
@@ -4540,7 +4541,7 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
  */
 int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
 				void *kaddr, unsigned long pgoff,
-				unsigned long size)
+				unsigned long size, bool set_vma)
 {
 	struct vm_struct *area;
 	unsigned long off;
@@ -4566,6 +4567,10 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
 		return -EINVAL;
 	kaddr += off;
 
+	/* If we shouldn't modify VMA flags, vm_insert_page() mustn't. */
+	if (!set_vma && !(vma->vm_flags & VM_MIXEDMAP))
+		return -EINVAL;
+
 	do {
 		struct page *page = vmalloc_to_page(kaddr);
 		int ret;
@@ -4579,7 +4584,11 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
 		size -= PAGE_SIZE;
 	} while (size > 0);
 
-	vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
+	if (set_vma)
+		vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
+	else
+		VM_WARN_ON_ONCE((vma->vm_flags & (VM_DONTEXPAND | VM_DONTDUMP)) !=
+				(VM_DONTEXPAND | VM_DONTDUMP));
 
 	return 0;
 }
@@ -4603,7 +4612,8 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 {
 	return remap_vmalloc_range_partial(vma, vma->vm_start,
 					   addr, pgoff,
-					   vma->vm_end - vma->vm_start);
+					   vma->vm_end - vma->vm_start,
+					   /* set_vma= */ true);
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
-- 
2.51.0