Currently, vread can read out vmalloc areas which is associated with
a vm_struct. While this doesn't work for areas created by vm_map_ram()
interface because it doesn't have an associated vm_struct. Then in vread(),
these areas are all skipped.
Here, add a new function vmap_ram_vread() to read out vm_map_ram areas.
The area created with vmap_ram_vread() interface directly can be handled
like the other normal vmap areas with aligned_vread(). While areas
which will be further subdivided and managed with vmap_block need
carefully read out page-aligned small regions and zero fill holes.
Signed-off-by: Baoquan He <bhe@redhat.com>
---
mm/vmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 73 insertions(+), 7 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ab4825050b5c..13875bc41e27 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3544,6 +3544,65 @@ static int aligned_vread(char *buf, char *addr, unsigned long count)
return copied;
}
+static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags)
+{
+ char *start;
+ struct vmap_block *vb;
+ unsigned long offset;
+ unsigned int rs, re, n;
+
+ /*
+ * If it's area created by vm_map_ram() interface directly, but
+ * not further subdividing and delegating management to vmap_block,
+ * handle it here.
+ */
+ if (!(flags & VMAP_BLOCK)) {
+ aligned_vread(buf, addr, count);
+ return;
+ }
+
+ /*
+ * Area is split into regions and tracked with vmap_block, read out
+ * each region and zero fill the hole between regions.
+ */
+ vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr));
+
+ spin_lock(&vb->lock);
+ if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) {
+ spin_unlock(&vb->lock);
+ memset(buf, 0, count);
+ return;
+ }
+ for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) {
+ if (!count)
+ break;
+ start = vmap_block_vaddr(vb->va->va_start, rs);
+ while (addr < start) {
+ if (count == 0)
+ break;
+ *buf = '\0';
+ buf++;
+ addr++;
+ count--;
+ }
+ /*it could start reading from the middle of used region*/
+ offset = offset_in_page(addr);
+ n = ((re - rs + 1) << PAGE_SHIFT) - offset;
+ if (n > count)
+ n = count;
+ aligned_vread(buf, start+offset, n);
+
+ buf += n;
+ addr += n;
+ count -= n;
+ }
+ spin_unlock(&vb->lock);
+
+ /* zero-fill the left dirty or free regions */
+ if (count)
+ memset(buf, 0, count);
+}
+
/**
* vread() - read vmalloc area in a safe way.
* @buf: buffer for reading data
@@ -3574,7 +3633,7 @@ long vread(char *buf, char *addr, unsigned long count)
struct vm_struct *vm;
char *vaddr, *buf_start = buf;
unsigned long buflen = count;
- unsigned long n;
+ unsigned long n, size, flags;
addr = kasan_reset_tag(addr);
@@ -3595,12 +3654,16 @@ long vread(char *buf, char *addr, unsigned long count)
if (!count)
break;
- if (!va->vm)
+ vm = va->vm;
+ flags = va->flags & VMAP_FLAGS_MASK;
+
+ if (!vm && !flags)
continue;
- vm = va->vm;
- vaddr = (char *) vm->addr;
- if (addr >= vaddr + get_vm_area_size(vm))
+ vaddr = (char *) va->va_start;
+ size = vm ? get_vm_area_size(vm) : va_size(va);
+
+ if (addr >= vaddr + size)
continue;
while (addr < vaddr) {
if (count == 0)
@@ -3610,10 +3673,13 @@ long vread(char *buf, char *addr, unsigned long count)
addr++;
count--;
}
- n = vaddr + get_vm_area_size(vm) - addr;
+ n = vaddr + size - addr;
if (n > count)
n = count;
- if (!(vm->flags & VM_IOREMAP))
+
+ if (flags & VMAP_RAM)
+ vmap_ram_vread(buf, addr, n, flags);
+ else if (!(vm->flags & VM_IOREMAP))
aligned_vread(buf, addr, n);
else /* IOREMAP area is treated as memory hole */
memset(buf, 0, n);
--
2.34.1
On Fri, Jan 13, 2023 at 11:19:17AM +0800, Baoquan He wrote: > + spin_lock(&vb->lock); > + if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { > + spin_unlock(&vb->lock); > + memset(buf, 0, count); > + return; > + } > + for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) { > + if (!count) > + break; > + start = vmap_block_vaddr(vb->va->va_start, rs); > + while (addr < start) { > + if (count == 0) > + break; > + *buf = '\0'; > + buf++; > + addr++; > + count--; > + } > + /*it could start reading from the middle of used region*/ > + offset = offset_in_page(addr); > + n = ((re - rs + 1) << PAGE_SHIFT) - offset; > + if (n > count) > + n = count; > + aligned_vread(buf, start+offset, n); The whole vread() interface is rather suboptimal. The only user is proc, which is trying to copy to userspace. But the vread() interface copies to a kernel address, so kcore has to copy to a bounce buffer. That makes this spinlock work, but the price is that we can't copy to a user address in the future. Ideally, read_kcore() would be kcore_read_iter() and we'd pass an iov_iter into vread(). vread() would then need to use a mutex rather than a spinlock. I don't think this needs to be done now, but if someone's looking for a project ...
On Mon, Jan 16, 2023 at 07:01:33PM +0000, Matthew Wilcox wrote: > On Fri, Jan 13, 2023 at 11:19:17AM +0800, Baoquan He wrote: > > + spin_lock(&vb->lock); > > + if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { > > + spin_unlock(&vb->lock); > > + memset(buf, 0, count); > > + return; > > + } > > + for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) { > > + if (!count) > > + break; > > + start = vmap_block_vaddr(vb->va->va_start, rs); > > + while (addr < start) { > > + if (count == 0) > > + break; > > + *buf = '\0'; > > + buf++; > > + addr++; > > + count--; > > + } > > + /*it could start reading from the middle of used region*/ > > + offset = offset_in_page(addr); > > + n = ((re - rs + 1) << PAGE_SHIFT) - offset; > > + if (n > count) > > + n = count; > > + aligned_vread(buf, start+offset, n); > > The whole vread() interface is rather suboptimal. The only user is proc, > which is trying to copy to userspace. But the vread() interface copies > to a kernel address, so kcore has to copy to a bounce buffer. That makes > this spinlock work, but the price is that we can't copy to a user address > in the future. Ideally, read_kcore() would be kcore_read_iter() and > we'd pass an iov_iter into vread(). vread() would then need to use a > mutex rather than a spinlock. > > I don't think this needs to be done now, but if someone's looking for > a project ... Interesting! I may take a look at this if I get the time :)
On Fri, Jan 13, 2023 at 11:19:17AM +0800, Baoquan He wrote: > Currently, vread can read out vmalloc areas which is associated with > a vm_struct. While this doesn't work for areas created by vm_map_ram() > interface because it doesn't have an associated vm_struct. Then in vread(), > these areas are all skipped. > > Here, add a new function vmap_ram_vread() to read out vm_map_ram areas. > The area created with vmap_ram_vread() interface directly can be handled > like the other normal vmap areas with aligned_vread(). While areas > which will be further subdivided and managed with vmap_block need > carefully read out page-aligned small regions and zero fill holes. > > Signed-off-by: Baoquan He <bhe@redhat.com> > --- > mm/vmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 73 insertions(+), 7 deletions(-) > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index ab4825050b5c..13875bc41e27 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -3544,6 +3544,65 @@ static int aligned_vread(char *buf, char *addr, unsigned long count) > return copied; > } > > +static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags) > +{ > + char *start; > + struct vmap_block *vb; > + unsigned long offset; > + unsigned int rs, re, n; > + > + /* > + * If it's area created by vm_map_ram() interface directly, but > + * not further subdividing and delegating management to vmap_block, > + * handle it here. > + */ > + if (!(flags & VMAP_BLOCK)) { > + aligned_vread(buf, addr, count); > + return; > + } > + > + /* > + * Area is split into regions and tracked with vmap_block, read out > + * each region and zero fill the hole between regions. > + */ > + vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr)); > + > + spin_lock(&vb->lock); > + if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { > CPU-X invokes free_vmap_block() whereas we take the vb->lock and do some manipulations with vb that might be already freed over RCU-core. Should we protect it by the rcu_read_lock() also here? -- Uladzislau Rezki
On 01/16/23 at 12:50pm, Uladzislau Rezki wrote: > On Fri, Jan 13, 2023 at 11:19:17AM +0800, Baoquan He wrote: > > Currently, vread can read out vmalloc areas which is associated with > > a vm_struct. While this doesn't work for areas created by vm_map_ram() > > interface because it doesn't have an associated vm_struct. Then in vread(), > > these areas are all skipped. > > > > Here, add a new function vmap_ram_vread() to read out vm_map_ram areas. > > The area created with vmap_ram_vread() interface directly can be handled > > like the other normal vmap areas with aligned_vread(). While areas > > which will be further subdivided and managed with vmap_block need > > carefully read out page-aligned small regions and zero fill holes. > > > > Signed-off-by: Baoquan He <bhe@redhat.com> > > --- > > mm/vmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++----- > > 1 file changed, 73 insertions(+), 7 deletions(-) > > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > > index ab4825050b5c..13875bc41e27 100644 > > --- a/mm/vmalloc.c > > +++ b/mm/vmalloc.c > > @@ -3544,6 +3544,65 @@ static int aligned_vread(char *buf, char *addr, unsigned long count) > > return copied; > > } > > > > +static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags) > > +{ > > + char *start; > > + struct vmap_block *vb; > > + unsigned long offset; > > + unsigned int rs, re, n; > > + > > + /* > > + * If it's area created by vm_map_ram() interface directly, but > > + * not further subdividing and delegating management to vmap_block, > > + * handle it here. > > + */ > > + if (!(flags & VMAP_BLOCK)) { > > + aligned_vread(buf, addr, count); > > + return; > > + } > > + > > + /* > > + * Area is split into regions and tracked with vmap_block, read out > > + * each region and zero fill the hole between regions. > > + */ > > + vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr)); > > + > > + spin_lock(&vb->lock); > > + if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { > > > CPU-X invokes free_vmap_block() whereas we take the vb->lock and do > some manipulations with vb that might be already freed over RCU-core. > > Should we protect it by the rcu_read_lock() also here? Just go over the vb and vbq code again, seems we don't need the rcu_read_lock() here. The rcu lock is needed when operating on the vmap_block_queue->free list. I don't see race between the vb accessing here and those list adding or removing on vmap_block_queue->free with rcu. If I miss some race windows between them, please help point out. However, when I check free_vmap_block(), I do find a risk. As you said, CPU-x invokes free_vmap_block() and executed xa_erase() to remove the vb from vmap_blocks tree. Then vread() comes into vmap_ram_vread() and call xa_load(), it would be null. I should check the returned vb in free_vmap_block(). static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags) { ...... if (!(flags & VMAP_BLOCK)) { aligned_vread(buf, addr, count); return; } /* * Area is split into regions and tracked with vmap_block, read out * each region and zero fill the hole between regions. */ vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr)); if (!vb) <-- vb need be checked here to avoid accessing erased vb from vmap_blocks tree memset(buf, 0, count); ...... }
On 01/19/23 at 05:52pm, Baoquan He wrote: > On 01/16/23 at 12:50pm, Uladzislau Rezki wrote: > > On Fri, Jan 13, 2023 at 11:19:17AM +0800, Baoquan He wrote: > > > Currently, vread can read out vmalloc areas which is associated with > > > a vm_struct. While this doesn't work for areas created by vm_map_ram() > > > interface because it doesn't have an associated vm_struct. Then in vread(), > > > these areas are all skipped. > > > > > > Here, add a new function vmap_ram_vread() to read out vm_map_ram areas. > > > The area created with vmap_ram_vread() interface directly can be handled > > > like the other normal vmap areas with aligned_vread(). While areas > > > which will be further subdivided and managed with vmap_block need > > > carefully read out page-aligned small regions and zero fill holes. > > > > > > Signed-off-by: Baoquan He <bhe@redhat.com> > > > --- > > > mm/vmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++----- > > > 1 file changed, 73 insertions(+), 7 deletions(-) > > > > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > > > index ab4825050b5c..13875bc41e27 100644 > > > --- a/mm/vmalloc.c > > > +++ b/mm/vmalloc.c > > > @@ -3544,6 +3544,65 @@ static int aligned_vread(char *buf, char *addr, unsigned long count) > > > return copied; > > > } > > > > > > +static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags) > > > +{ > > > + char *start; > > > + struct vmap_block *vb; > > > + unsigned long offset; > > > + unsigned int rs, re, n; > > > + > > > + /* > > > + * If it's area created by vm_map_ram() interface directly, but > > > + * not further subdividing and delegating management to vmap_block, > > > + * handle it here. > > > + */ > > > + if (!(flags & VMAP_BLOCK)) { > > > + aligned_vread(buf, addr, count); > > > + return; > > > + } > > > + > > > + /* > > > + * Area is split into regions and tracked with vmap_block, read out > > > + * each region and zero fill the hole between regions. > > > + */ > > > + vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr)); > > > + > > > + spin_lock(&vb->lock); > > > + if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { > > > > > CPU-X invokes free_vmap_block() whereas we take the vb->lock and do > > some manipulations with vb that might be already freed over RCU-core. > > > > Should we protect it by the rcu_read_lock() also here? > > Just go over the vb and vbq code again, seems we don't need the > rcu_read_lock() here. The rcu lock is needed when operating on the > vmap_block_queue->free list. I don't see race between the vb accessing > here and those list adding or removing on vmap_block_queue->free with > rcu. If I miss some race windows between them, please help point out. > > However, when I check free_vmap_block(), I do find a risk. As you said, Forgot to add details about why there's no race between free_vmap_block() and vmap_ram_vread() because we have taken vmap_area_lock at the beginning of vread(). So, except of the missing checking on returned value from xa_load(), free_vmap_block() either is blocked to wait for vmap_area_lock before calling unlink_va(), or finishes calling unlink_va() to remove the vmap from vmap_area_root tree. In both cases, no race happened. > CPU-x invokes free_vmap_block() and executed xa_erase() to remove the vb > from vmap_blocks tree. Then vread() comes into vmap_ram_vread() and call > xa_load(), it would be null. I should check the returned vb in > free_vmap_block(). > > > static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags) > { > ...... > if (!(flags & VMAP_BLOCK)) { > aligned_vread(buf, addr, count); > return; > } > > /* > * Area is split into regions and tracked with vmap_block, read out > * each region and zero fill the hole between regions. > */ > vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr)); > if (!vb) <-- vb need be checked here to avoid accessing erased vb from vmap_blocks tree > memset(buf, 0, count); > ...... > } >
> > On 01/19/23 at 05:52pm, Baoquan He wrote: > > On 01/16/23 at 12:50pm, Uladzislau Rezki wrote: > > > On Fri, Jan 13, 2023 at 11:19:17AM +0800, Baoquan He wrote: > > > > Currently, vread can read out vmalloc areas which is associated with > > > > a vm_struct. While this doesn't work for areas created by vm_map_ram() > > > > interface because it doesn't have an associated vm_struct. Then in vread(), > > > > these areas are all skipped. > > > > > > > > Here, add a new function vmap_ram_vread() to read out vm_map_ram areas. > > > > The area created with vmap_ram_vread() interface directly can be handled > > > > like the other normal vmap areas with aligned_vread(). While areas > > > > which will be further subdivided and managed with vmap_block need > > > > carefully read out page-aligned small regions and zero fill holes. > > > > > > > > Signed-off-by: Baoquan He <bhe@redhat.com> > > > > --- > > > > mm/vmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++----- > > > > 1 file changed, 73 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > > > > index ab4825050b5c..13875bc41e27 100644 > > > > --- a/mm/vmalloc.c > > > > +++ b/mm/vmalloc.c > > > > @@ -3544,6 +3544,65 @@ static int aligned_vread(char *buf, char *addr, unsigned long count) > > > > return copied; > > > > } > > > > > > > > +static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags) > > > > +{ > > > > + char *start; > > > > + struct vmap_block *vb; > > > > + unsigned long offset; > > > > + unsigned int rs, re, n; > > > > + > > > > + /* > > > > + * If it's area created by vm_map_ram() interface directly, but > > > > + * not further subdividing and delegating management to vmap_block, > > > > + * handle it here. > > > > + */ > > > > + if (!(flags & VMAP_BLOCK)) { > > > > + aligned_vread(buf, addr, count); > > > > + return; > > > > + } > > > > + > > > > + /* > > > > + * Area is split into regions and tracked with vmap_block, read out > > > > + * each region and zero fill the hole between regions. > > > > + */ > > > > + vb = xa_load(&vmap_blocks, addr_to_vb_idx((unsigned long)addr)); > > > > + > > > > + spin_lock(&vb->lock); > > > > + if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { > > > > > > > CPU-X invokes free_vmap_block() whereas we take the vb->lock and do > > > some manipulations with vb that might be already freed over RCU-core. > > > > > > Should we protect it by the rcu_read_lock() also here? > > > > Just go over the vb and vbq code again, seems we don't need the > > rcu_read_lock() here. The rcu lock is needed when operating on the > > vmap_block_queue->free list. I don't see race between the vb accessing > > here and those list adding or removing on vmap_block_queue->free with > > rcu. If I miss some race windows between them, please help point out. > > > > However, when I check free_vmap_block(), I do find a risk. As you said, > > Forgot to add details about why there's no race between free_vmap_block() > and vmap_ram_vread() because we have taken vmap_area_lock at the beginning > of vread(). So, except of the missing checking on returned value from > xa_load(), free_vmap_block() either is blocked to wait for vmap_area_lock > before calling unlink_va(), or finishes calling unlink_va() to remove > the vmap from vmap_area_root tree. In both cases, no race happened. > Agree. xa_load()s return value should be checked. Because it can be that there is no vmap_block associated with an address if xa_erase() was done earlier. -- Uladzislau Rezki
Hi Baoquan, url: https://github.com/intel-lab-lkp/linux/commits/Baoquan-He/mm-vmalloc-c-add-used_map-into-vmap_block-to-track-space-of-vmap_block/20230113-112149 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything patch link: https://lore.kernel.org/r/20230113031921.64716-4-bhe%40redhat.com patch subject: [PATCH v3 3/7] mm/vmalloc.c: allow vread() to read out vm_map_ram areas config: i386-randconfig-m021 compiler: gcc-11 (Debian 11.3.0-8) 11.3.0 If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> | Reported-by: Dan Carpenter <error27@gmail.com> smatch warnings: mm/vmalloc.c:3682 vread() error: we previously assumed 'vm' could be null (see line 3664) vim +/vm +3682 mm/vmalloc.c ^1da177e4c3f415 Linus Torvalds 2005-04-16 3630 long vread(char *buf, char *addr, unsigned long count) ^1da177e4c3f415 Linus Torvalds 2005-04-16 3631 { e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3632 struct vmap_area *va; e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3633 struct vm_struct *vm; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3634 char *vaddr, *buf_start = buf; d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3635 unsigned long buflen = count; 129dbdf298d7383 Baoquan He 2023-01-13 3636 unsigned long n, size, flags; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3637 4aff1dc4fb3a5a3 Andrey Konovalov 2022-03-24 3638 addr = kasan_reset_tag(addr); 4aff1dc4fb3a5a3 Andrey Konovalov 2022-03-24 3639 ^1da177e4c3f415 Linus Torvalds 2005-04-16 3640 /* Don't allow overflow */ ^1da177e4c3f415 Linus Torvalds 2005-04-16 3641 if ((unsigned long) addr + count < count) ^1da177e4c3f415 Linus Torvalds 2005-04-16 3642 count = -(unsigned long) addr; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3643 e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3644 spin_lock(&vmap_area_lock); f181234a5a21fd0 Chen Wandun 2021-09-02 3645 va = find_vmap_area_exceed_addr((unsigned long)addr); f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3646 if (!va) f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3647 goto finished; f181234a5a21fd0 Chen Wandun 2021-09-02 3648 f181234a5a21fd0 Chen Wandun 2021-09-02 3649 /* no intersects with alive vmap_area */ f181234a5a21fd0 Chen Wandun 2021-09-02 3650 if ((unsigned long)addr + count <= va->va_start) f181234a5a21fd0 Chen Wandun 2021-09-02 3651 goto finished; f181234a5a21fd0 Chen Wandun 2021-09-02 3652 f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3653 list_for_each_entry_from(va, &vmap_area_list, list) { e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3654 if (!count) e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3655 break; e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3656 129dbdf298d7383 Baoquan He 2023-01-13 3657 vm = va->vm; 129dbdf298d7383 Baoquan He 2023-01-13 3658 flags = va->flags & VMAP_FLAGS_MASK; 129dbdf298d7383 Baoquan He 2023-01-13 3659 129dbdf298d7383 Baoquan He 2023-01-13 3660 if (!vm && !flags) ^^^ vm can be NULL if a flag in VMAP_FLAGS_MASK is set. e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3661 continue; e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3662 129dbdf298d7383 Baoquan He 2023-01-13 3663 vaddr = (char *) va->va_start; 129dbdf298d7383 Baoquan He 2023-01-13 @3664 size = vm ? get_vm_area_size(vm) : va_size(va); ^^ 129dbdf298d7383 Baoquan He 2023-01-13 3665 129dbdf298d7383 Baoquan He 2023-01-13 3666 if (addr >= vaddr + size) ^1da177e4c3f415 Linus Torvalds 2005-04-16 3667 continue; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3668 while (addr < vaddr) { ^1da177e4c3f415 Linus Torvalds 2005-04-16 3669 if (count == 0) ^1da177e4c3f415 Linus Torvalds 2005-04-16 3670 goto finished; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3671 *buf = '\0'; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3672 buf++; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3673 addr++; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3674 count--; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3675 } 129dbdf298d7383 Baoquan He 2023-01-13 3676 n = vaddr + size - addr; d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3677 if (n > count) d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3678 n = count; 129dbdf298d7383 Baoquan He 2023-01-13 3679 129dbdf298d7383 Baoquan He 2023-01-13 3680 if (flags & VMAP_RAM) assume VMAP_RAM is not set 129dbdf298d7383 Baoquan He 2023-01-13 3681 vmap_ram_vread(buf, addr, n, flags); 129dbdf298d7383 Baoquan He 2023-01-13 @3682 else if (!(vm->flags & VM_IOREMAP)) ^^^^^^^^^ Unchecked dereference. Should this be "flags" instead of "vm->flags"? d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3683 aligned_vread(buf, addr, n); d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3684 else /* IOREMAP area is treated as memory hole */ d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3685 memset(buf, 0, n); d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3686 buf += n; d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3687 addr += n; d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3688 count -= n; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3689 } ^1da177e4c3f415 Linus Torvalds 2005-04-16 3690 finished: e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3691 spin_unlock(&vmap_area_lock); d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3692 d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3693 if (buf == buf_start) d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3694 return 0; d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3695 /* zero-fill memory holes */ d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3696 if (buf != buf_start + buflen) d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3697 memset(buf, 0, buflen - (buf - buf_start)); d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3698 d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3699 return buflen; ^1da177e4c3f415 Linus Torvalds 2005-04-16 3700 } -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests
Hi Dan, On 01/14/23 at 10:57am, Dan Carpenter wrote: > Hi Baoquan, > > url: https://github.com/intel-lab-lkp/linux/commits/Baoquan-He/mm-vmalloc-c-add-used_map-into-vmap_block-to-track-space-of-vmap_block/20230113-112149 > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything > patch link: https://lore.kernel.org/r/20230113031921.64716-4-bhe%40redhat.com > patch subject: [PATCH v3 3/7] mm/vmalloc.c: allow vread() to read out vm_map_ram areas > config: i386-randconfig-m021 > compiler: gcc-11 (Debian 11.3.0-8) 11.3.0 > > If you fix the issue, kindly add following tag where applicable > | Reported-by: kernel test robot <lkp@intel.com> > | Reported-by: Dan Carpenter <error27@gmail.com> > > smatch warnings: > mm/vmalloc.c:3682 vread() error: we previously assumed 'vm' could be null (see line 3664) Thanks for checking this. I went through the code flow again, personally think that the issue and risk pointed out could not exist. Please see the comment at below. > > vim +/vm +3682 mm/vmalloc.c > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3630 long vread(char *buf, char *addr, unsigned long count) > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3631 { > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3632 struct vmap_area *va; > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3633 struct vm_struct *vm; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3634 char *vaddr, *buf_start = buf; > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3635 unsigned long buflen = count; > 129dbdf298d7383 Baoquan He 2023-01-13 3636 unsigned long n, size, flags; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3637 > 4aff1dc4fb3a5a3 Andrey Konovalov 2022-03-24 3638 addr = kasan_reset_tag(addr); > 4aff1dc4fb3a5a3 Andrey Konovalov 2022-03-24 3639 > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3640 /* Don't allow overflow */ > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3641 if ((unsigned long) addr + count < count) > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3642 count = -(unsigned long) addr; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3643 > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3644 spin_lock(&vmap_area_lock); > f181234a5a21fd0 Chen Wandun 2021-09-02 3645 va = find_vmap_area_exceed_addr((unsigned long)addr); > f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3646 if (!va) > f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3647 goto finished; > f181234a5a21fd0 Chen Wandun 2021-09-02 3648 > f181234a5a21fd0 Chen Wandun 2021-09-02 3649 /* no intersects with alive vmap_area */ > f181234a5a21fd0 Chen Wandun 2021-09-02 3650 if ((unsigned long)addr + count <= va->va_start) > f181234a5a21fd0 Chen Wandun 2021-09-02 3651 goto finished; > f181234a5a21fd0 Chen Wandun 2021-09-02 3652 > f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3653 list_for_each_entry_from(va, &vmap_area_list, list) { > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3654 if (!count) > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3655 break; > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3656 > 129dbdf298d7383 Baoquan He 2023-01-13 3657 vm = va->vm; > 129dbdf298d7383 Baoquan He 2023-01-13 3658 flags = va->flags & VMAP_FLAGS_MASK; > 129dbdf298d7383 Baoquan He 2023-01-13 3659 > 129dbdf298d7383 Baoquan He 2023-01-13 3660 if (!vm && !flags) > ^^^ > vm can be NULL if a flag in VMAP_FLAGS_MASK is set. > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3661 continue; Right, after the 'continue;' line, only two cases could happen when it comes here. (vm != null) or (vm->flags & VMAP_RAM) is true. > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3662 > 129dbdf298d7383 Baoquan He 2023-01-13 3663 vaddr = (char *) va->va_start; > 129dbdf298d7383 Baoquan He 2023-01-13 @3664 size = vm ? get_vm_area_size(vm) : va_size(va); > ^^ > > 129dbdf298d7383 Baoquan He 2023-01-13 3665 > 129dbdf298d7383 Baoquan He 2023-01-13 3666 if (addr >= vaddr + size) > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3667 continue; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3668 while (addr < vaddr) { > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3669 if (count == 0) > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3670 goto finished; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3671 *buf = '\0'; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3672 buf++; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3673 addr++; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3674 count--; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3675 } > 129dbdf298d7383 Baoquan He 2023-01-13 3676 n = vaddr + size - addr; > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3677 if (n > count) > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3678 n = count; > 129dbdf298d7383 Baoquan He 2023-01-13 3679 > 129dbdf298d7383 Baoquan He 2023-01-13 3680 if (flags & VMAP_RAM) > > assume VMAP_RAM is not set OK, then vm is not null. > > 129dbdf298d7383 Baoquan He 2023-01-13 3681 vmap_ram_vread(buf, addr, n, flags); > 129dbdf298d7383 Baoquan He 2023-01-13 @3682 else if (!(vm->flags & VM_IOREMAP)) > ^^^^^^^^^ > Unchecked dereference. Should this be "flags" instead of "vm->flags"? Thus, here, in 'else if', vm is not null. And in this 'else if', we are intending to check vm->flags. I don't see issue or risk here. Please help check if I miss anything. Thanks Baoquan > > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3683 aligned_vread(buf, addr, n); > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3684 else /* IOREMAP area is treated as memory hole */ > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3685 memset(buf, 0, n); > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3686 buf += n; > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3687 addr += n; > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3688 count -= n; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3689 } > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3690 finished: > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3691 spin_unlock(&vmap_area_lock); > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3692 > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3693 if (buf == buf_start) > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3694 return 0; > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3695 /* zero-fill memory holes */ > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3696 if (buf != buf_start + buflen) > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3697 memset(buf, 0, buflen - (buf - buf_start)); > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3698 > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3699 return buflen; > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3700 } > > -- > 0-DAY CI Kernel Test Service > https://github.com/intel/lkp-tests >
On Sun, Jan 15, 2023 at 10:08:55PM +0800, Baoquan He wrote: > > f181234a5a21fd0 Chen Wandun 2021-09-02 3650 if ((unsigned long)addr + count <= va->va_start) > > f181234a5a21fd0 Chen Wandun 2021-09-02 3651 goto finished; > > f181234a5a21fd0 Chen Wandun 2021-09-02 3652 > > f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3653 list_for_each_entry_from(va, &vmap_area_list, list) { > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3654 if (!count) > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3655 break; > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3656 > > 129dbdf298d7383 Baoquan He 2023-01-13 3657 vm = va->vm; > > 129dbdf298d7383 Baoquan He 2023-01-13 3658 flags = va->flags & VMAP_FLAGS_MASK; > > 129dbdf298d7383 Baoquan He 2023-01-13 3659 > > 129dbdf298d7383 Baoquan He 2023-01-13 3660 if (!vm && !flags) > > ^^^ > > vm can be NULL if a flag in VMAP_FLAGS_MASK is set. > > > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3661 continue; > > Right, after the 'continue;' line, only two cases could happen when it > comes here. (vm != null) or (vm->flags & VMAP_RAM) is true. > You're saying VMAP_RAM, but strictly speaking the code is checking VMAP_FLAGS_MASK and not VMAP_RAM. +#define VMAP_RAM 0x1 /* indicates vm_map_ram area*/ +#define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/ +#define VMAP_FLAGS_MASK 0x3 If we assume that vm is NULL, VMAP_BLOCK is set and VMAP_RAM is clear then it would lead to a NULL dereference. There might be reasons why that combination is impossible outside the function but we can't tell from the information we have here. Which is fine, outside information is a common reason for false positives with this check. But I was just concerned about the mix of VMAP_FLAGS_MASK and VMAP_RAM. regards, dan carpenter > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3662 > > 129dbdf298d7383 Baoquan He 2023-01-13 3663 vaddr = (char *) va->va_start; > > 129dbdf298d7383 Baoquan He 2023-01-13 @3664 size = vm ? get_vm_area_size(vm) : va_size(va); > > ^^ > > > > 129dbdf298d7383 Baoquan He 2023-01-13 3665 > > 129dbdf298d7383 Baoquan He 2023-01-13 3666 if (addr >= vaddr + size) > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3667 continue; > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3668 while (addr < vaddr) { > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3669 if (count == 0) > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3670 goto finished; > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3671 *buf = '\0'; > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3672 buf++; > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3673 addr++; > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3674 count--; > > ^1da177e4c3f415 Linus Torvalds 2005-04-16 3675 } > > 129dbdf298d7383 Baoquan He 2023-01-13 3676 n = vaddr + size - addr; > > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3677 if (n > count) > > d0107eb07320b5d KAMEZAWA Hiroyuki 2009-09-21 3678 n = count; > > 129dbdf298d7383 Baoquan He 2023-01-13 3679 > > 129dbdf298d7383 Baoquan He 2023-01-13 3680 if (flags & VMAP_RAM) > > > > assume VMAP_RAM is not set > > OK, then vm is not null. > > > > 129dbdf298d7383 Baoquan He 2023-01-13 3681 vmap_ram_vread(buf, addr, n, flags); > > 129dbdf298d7383 Baoquan He 2023-01-13 @3682 else if (!(vm->flags & VM_IOREMAP)) > > ^^^^^^^^^ > > Unchecked dereference. Should this be "flags" instead of "vm->flags"? > > Thus, here, in 'else if', vm is not null. And in this 'else if', we are > intending to check vm->flags. I don't see issue or risk here. Please > help check if I miss anything. > > Thanks > Baoquan >
On 01/16/23 at 04:08pm, Dan Carpenter wrote: > On Sun, Jan 15, 2023 at 10:08:55PM +0800, Baoquan He wrote: > > > f181234a5a21fd0 Chen Wandun 2021-09-02 3650 if ((unsigned long)addr + count <= va->va_start) > > > f181234a5a21fd0 Chen Wandun 2021-09-02 3651 goto finished; > > > f181234a5a21fd0 Chen Wandun 2021-09-02 3652 > > > f608788cd2d6cae Serapheim Dimitropoulos 2021-04-29 3653 list_for_each_entry_from(va, &vmap_area_list, list) { > > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3654 if (!count) > > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3655 break; > > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3656 > > > 129dbdf298d7383 Baoquan He 2023-01-13 3657 vm = va->vm; > > > 129dbdf298d7383 Baoquan He 2023-01-13 3658 flags = va->flags & VMAP_FLAGS_MASK; > > > 129dbdf298d7383 Baoquan He 2023-01-13 3659 > > > 129dbdf298d7383 Baoquan He 2023-01-13 3660 if (!vm && !flags) > > > ^^^ > > > vm can be NULL if a flag in VMAP_FLAGS_MASK is set. > > > > > > e81ce85f960c2e2 Joonsoo Kim 2013-04-29 3661 continue; > > > > Right, after the 'continue;' line, only two cases could happen when it > > comes here. (vm != null) or (vm->flags & VMAP_RAM) is true. > > > > You're saying VMAP_RAM, but strictly speaking the code is checking > VMAP_FLAGS_MASK and not VMAP_RAM. > > +#define VMAP_RAM 0x1 /* indicates vm_map_ram area*/ > +#define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/ > +#define VMAP_FLAGS_MASK 0x3 > > If we assume that vm is NULL, VMAP_BLOCK is set and VMAP_RAM is clear > then it would lead to a NULL dereference. There might be reasons why > that combination is impossible outside the function but we can't tell > from the information we have here. VMAP_BLOCK has no chance to be set alone. It has to be set together with VMAP_RAM if needed. > > Which is fine, outside information is a common reason for false > positives with this check. But I was just concerned about the mix of > VMAP_FLAGS_MASK and VMAP_RAM. Thanks, I see your point now, will consider how to improve it.
© 2016 - 2025 Red Hat, Inc.