accel/tcg/translate-all.c | 8 +++++-- bsd-user/mmap.c | 8 +++---- include/exec/cpu-all.h | 11 ++++++++- linux-user/elfload.c | 3 ++- linux-user/mmap.c | 49 +++++++++++++++++++++++++++++++++++---- linux-user/qemu.h | 1 + linux-user/syscall.c | 12 ++++------ 7 files changed, 72 insertions(+), 20 deletions(-)
Most flags to madvise() are just hints, so typically ignoring the
syscall and returning okay is fine. However applications exist that do
rely on MADV_DONTNEED behavior to guarantee that upon subsequent access
the mapping is refreshed from the backing file or zero for anonymous
mappings. The file backed mappings this is tricky - hence the original
comment - but for anonymous mappings it is safe to forward. We just need
to remember which those mappings are, which is now stored in the flags.
Cc: Riku Voipio <riku.voipio@iki.fi>
Cc: Laurent Vivier <laurent@vivier.eu>
Cc: Sami Nurmenniemi <sami.nurmenniemi@qt.io>
Signed-off-by: Simon Hausmann <simon.hausmann@qt.io>
--
v2:
- align start and len on host page size
v3:
- align start and len to target page size as it may be bigger than
host
- use immediate return in do_syscall
- forward MADV_DONTNEED advice only for anonymously mapped pages
- remember which mappings are anonymous in the page flags and preserve
those bits across mprotect calls.
---
accel/tcg/translate-all.c | 8 +++++--
bsd-user/mmap.c | 8 +++----
include/exec/cpu-all.h | 11 ++++++++-
linux-user/elfload.c | 3 ++-
linux-user/mmap.c | 49 +++++++++++++++++++++++++++++++++++----
linux-user/qemu.h | 1 +
linux-user/syscall.c | 12 ++++------
7 files changed, 72 insertions(+), 20 deletions(-)
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 898c3bb3d1..467fbd9aeb 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -2481,7 +2481,8 @@ int page_get_flags(target_ulong address)
/* Modify the flags of a page and invalidate the code if necessary.
The flag PAGE_WRITE_ORG is positioned automatically depending
on PAGE_WRITE. The mmap_lock should already be held. */
-void page_set_flags(target_ulong start, target_ulong end, int flags)
+void page_set_flags(target_ulong start, target_ulong end, int flags,
+ enum page_set_flags_mode mode)
{
target_ulong addr, len;
@@ -2513,7 +2514,10 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
p->first_tb) {
tb_invalidate_phys_page(addr, 0);
}
- p->flags = flags;
+ if (mode == PAGE_SET_ALL_FLAGS)
+ p->flags = flags;
+ else /* PAGE_SET_PROTECTION */
+ p->flags |= (p->flags & ~(PAGE_BITS - 1)) | (flags & PAGE_BITS);
}
}
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 17f4cd80aa..4bfac81af4 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -125,7 +125,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
if (ret != 0)
goto error;
}
- page_set_flags(start, start + len, prot | PAGE_VALID);
+ page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
mmap_unlock();
return 0;
error:
@@ -214,7 +214,7 @@ static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
to mmap. */
page_set_flags(last_brk & TARGET_PAGE_MASK,
TARGET_PAGE_ALIGN(new_brk),
- PAGE_RESERVED);
+ PAGE_RESERVED, PAGE_SET_ALL_FLAGS);
}
last_brk = new_brk;
@@ -397,7 +397,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
}
}
the_end1:
- page_set_flags(start, start + len, prot | PAGE_VALID);
+ page_set_flags(start, start + len, prot | PAGE_VALID, PAGE_SET_ALL_FLAGS);
the_end:
#ifdef DEBUG_MMAP
printf("ret=0x" TARGET_FMT_lx "\n", start);
@@ -460,7 +460,7 @@ int target_munmap(abi_ulong start, abi_ulong len)
}
if (ret == 0)
- page_set_flags(start, start + len, 0);
+ page_set_flags(start, start + len, 0, PAGE_SET_ALL_FLAGS);
mmap_unlock();
return ret;
}
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 117d2fbbca..100388dcd4 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -249,6 +249,9 @@ extern intptr_t qemu_host_page_mask;
/* FIXME: Code that sets/uses this is broken and needs to go away. */
#define PAGE_RESERVED 0x0020
#endif
+#if defined(CONFIG_LINUX) && defined(CONFIG_USER_ONLY)
+#define PAGE_MAP_ANONYMOUS 0x0080
+#endif
#if defined(CONFIG_USER_ONLY)
void page_dump(FILE *f);
@@ -257,8 +260,14 @@ typedef int (*walk_memory_regions_fn)(void *, target_ulong,
target_ulong, unsigned long);
int walk_memory_regions(void *, walk_memory_regions_fn);
+enum page_set_flags_mode {
+ PAGE_SET_ALL_FLAGS,
+ PAGE_SET_PROTECTION
+};
+
int page_get_flags(target_ulong address);
-void page_set_flags(target_ulong start, target_ulong end, int flags);
+void page_set_flags(target_ulong start, target_ulong end, int flags,
+ enum page_set_flags_mode);
int page_check_range(target_ulong start, target_ulong len, int flags);
#endif
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 8638612aec..c59cf4359c 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1702,7 +1702,8 @@ static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
/* Ensure that the bss page(s) are valid */
if ((page_get_flags(last_bss-1) & prot) != prot) {
- page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot | PAGE_VALID);
+ page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot,
+ PAGE_SET_PROTECTION);
}
if (host_start < host_map_start) {
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 41e0983ce8..fff29ee04b 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -124,7 +124,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
if (ret != 0)
goto error;
}
- page_set_flags(start, start + len, prot | PAGE_VALID);
+ page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
mmap_unlock();
return 0;
error:
@@ -362,6 +362,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
int flags, int fd, abi_ulong offset)
{
abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
+ int page_flags;
mmap_lock();
#ifdef DEBUG_MMAP
@@ -562,7 +563,11 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
}
}
the_end1:
- page_set_flags(start, start + len, prot | PAGE_VALID);
+ page_flags = prot | PAGE_VALID;
+ if (flags & MAP_ANONYMOUS) {
+ page_flags |= PAGE_MAP_ANONYMOUS;
+ }
+ page_set_flags(start, start + len, page_flags, PAGE_SET_ALL_FLAGS);
the_end:
#ifdef DEBUG_MMAP
printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
@@ -675,7 +680,7 @@ int target_munmap(abi_ulong start, abi_ulong len)
}
if (ret == 0) {
- page_set_flags(start, start + len, 0);
+ page_set_flags(start, start + len, 0, PAGE_SET_ALL_FLAGS);
tb_invalidate_phys_range(start, start + len);
}
mmap_unlock();
@@ -755,10 +760,44 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
} else {
new_addr = h2g(host_addr);
prot = page_get_flags(old_addr);
- page_set_flags(old_addr, old_addr + old_size, 0);
- page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
+ page_set_flags(old_addr, old_addr + old_size, 0,
+ PAGE_SET_ALL_FLAGS);
+ page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID,
+ PAGE_SET_ALL_FLAGS);
}
tb_invalidate_phys_range(new_addr, new_addr + new_size);
mmap_unlock();
return new_addr;
}
+
+int target_madvise(abi_ulong start, abi_ulong len, int advice)
+{
+ abi_ulong end, addr;
+ int ret;
+
+ len = TARGET_PAGE_ALIGN(len);
+ start &= TARGET_PAGE_MASK;
+
+ if (!guest_range_valid(start, len)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* A straight passthrough may not be safe because qemu sometimes
+ turns private file-backed mappings into anonymous mappings.
+ Most flags are hints so we ignore them.
+ One exception is made for MADV_DONTNEED on anonymous mappings,
+ that applications may rely on to zero out pages. */
+ if (advice & MADV_DONTNEED) {
+ end = start + len;
+ for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
+ if (page_get_flags(addr) & PAGE_MAP_ANONYMOUS) {
+ ret = madvise(g2h(addr), TARGET_PAGE_SIZE, MADV_DONTNEED);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ }
+ }
+ return 0;
+}
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index b4959e41c6..e5ac5e9c6a 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -437,6 +437,7 @@ int target_munmap(abi_ulong start, abi_ulong len);
abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
abi_ulong new_size, unsigned long flags,
abi_ulong new_addr);
+int target_madvise(abi_ulong start, abi_ulong len, int advice);
extern unsigned long last_brk;
extern abi_ulong mmap_next_start;
abi_ulong mmap_find_vma(abi_ulong, abi_ulong);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 850b72a0c7..4478de5fc8 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -5124,7 +5124,8 @@ static inline abi_ulong do_shmat(CPUArchState *cpu_env,
page_set_flags(raddr, raddr + shm_info.shm_segsz,
PAGE_VALID | PAGE_READ |
- ((shmflg & SHM_RDONLY)? 0 : PAGE_WRITE));
+ ((shmflg & SHM_RDONLY) ? 0 : PAGE_WRITE),
+ PAGE_SET_ALL_FLAGS);
for (i = 0; i < N_SHM_REGIONS; i++) {
if (!shm_regions[i].in_use) {
@@ -5150,7 +5151,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
for (i = 0; i < N_SHM_REGIONS; ++i) {
if (shm_regions[i].in_use && shm_regions[i].start == shmaddr) {
shm_regions[i].in_use = false;
- page_set_flags(shmaddr, shmaddr + shm_regions[i].size, 0);
+ page_set_flags(shmaddr, shmaddr + shm_regions[i].size, 0,
+ PAGE_SET_ALL_FLAGS);
break;
}
}
@@ -11559,11 +11561,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
#ifdef TARGET_NR_madvise
case TARGET_NR_madvise:
- /* A straight passthrough may not be safe because qemu sometimes
- turns private file-backed mappings into anonymous mappings.
- This will break MADV_DONTNEED.
- This is a hint, so ignoring and returning success is ok. */
- return 0;
+ return get_errno(target_madvise(arg1, arg2, arg3));
#endif
#if TARGET_ABI_BITS == 32
case TARGET_NR_fcntl64:
--
2.17.1
Le 27/08/2018 à 10:40, Simon Hausmann a écrit :
> Most flags to madvise() are just hints, so typically ignoring the
> syscall and returning okay is fine. However applications exist that do
> rely on MADV_DONTNEED behavior to guarantee that upon subsequent access
> the mapping is refreshed from the backing file or zero for anonymous
> mappings. The file backed mappings this is tricky - hence the original
> comment - but for anonymous mappings it is safe to forward. We just need
> to remember which those mappings are, which is now stored in the flags.
>
> Cc: Riku Voipio <riku.voipio@iki.fi>
> Cc: Laurent Vivier <laurent@vivier.eu>
> Cc: Sami Nurmenniemi <sami.nurmenniemi@qt.io>
> Signed-off-by: Simon Hausmann <simon.hausmann@qt.io>
>
> --
> v2:
> - align start and len on host page size
> v3:
> - align start and len to target page size as it may be bigger than
> host
> - use immediate return in do_syscall
> - forward MADV_DONTNEED advice only for anonymously mapped pages
> - remember which mappings are anonymous in the page flags and preserve
> those bits across mprotect calls.
> ---
> accel/tcg/translate-all.c | 8 +++++--
> bsd-user/mmap.c | 8 +++----
> include/exec/cpu-all.h | 11 ++++++++-
> linux-user/elfload.c | 3 ++-
> linux-user/mmap.c | 49 +++++++++++++++++++++++++++++++++++----
> linux-user/qemu.h | 1 +
> linux-user/syscall.c | 12 ++++------
> 7 files changed, 72 insertions(+), 20 deletions(-)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 898c3bb3d1..467fbd9aeb 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -2481,7 +2481,8 @@ int page_get_flags(target_ulong address)
> /* Modify the flags of a page and invalidate the code if necessary.
> The flag PAGE_WRITE_ORG is positioned automatically depending
> on PAGE_WRITE. The mmap_lock should already be held. */
> -void page_set_flags(target_ulong start, target_ulong end, int flags)
> +void page_set_flags(target_ulong start, target_ulong end, int flags,
> + enum page_set_flags_mode mode)
Is it possible to not add the mode, and only use "flags" to set the new
flag? Using page_get_flags() to keep flags that needed to be kept?
> {
> target_ulong addr, len;
>
> @@ -2513,7 +2514,10 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
> p->first_tb) {
> tb_invalidate_phys_page(addr, 0);
> }
> - p->flags = flags;
> + if (mode == PAGE_SET_ALL_FLAGS)
> + p->flags = flags;
> + else /* PAGE_SET_PROTECTION */
> + p->flags |= (p->flags & ~(PAGE_BITS - 1)) | (flags & PAGE_BITS);
> }
> }
>
> diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
> index 17f4cd80aa..4bfac81af4 100644
> --- a/bsd-user/mmap.c
> +++ b/bsd-user/mmap.c
> @@ -125,7 +125,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
> if (ret != 0)
> goto error;
> }
> - page_set_flags(start, start + len, prot | PAGE_VALID);
> + page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
Why do you remove the PAGE_VALID flag?
...
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 8638612aec..c59cf4359c 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -1702,7 +1702,8 @@ static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
>
> /* Ensure that the bss page(s) are valid */
> if ((page_get_flags(last_bss-1) & prot) != prot) {
> - page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot | PAGE_VALID);
> + page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot,
> + PAGE_SET_PROTECTION);
> }
PAGE_VALID?
>
> if (host_start < host_map_start) {
> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> index 41e0983ce8..fff29ee04b 100644
> --- a/linux-user/mmap.c
> +++ b/linux-user/mmap.c
> @@ -124,7 +124,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
> if (ret != 0)
> goto error;
> }
> - page_set_flags(start, start + len, prot | PAGE_VALID);
> + page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
> mmap_unlock();
PAGE_VALID?
...
> +int target_madvise(abi_ulong start, abi_ulong len, int advice)
> +{
> + abi_ulong end, addr;
> + int ret;
> +
> + len = TARGET_PAGE_ALIGN(len);
> + start &= TARGET_PAGE_MASK;
> +
> + if (!guest_range_valid(start, len)) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + /* A straight passthrough may not be safe because qemu sometimes
> + turns private file-backed mappings into anonymous mappings.
> + Most flags are hints so we ignore them.
> + One exception is made for MADV_DONTNEED on anonymous mappings,
> + that applications may rely on to zero out pages. */
> + if (advice & MADV_DONTNEED) {
> + end = start + len;
> + for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
> + if (page_get_flags(addr) & PAGE_MAP_ANONYMOUS) {
> + ret = madvise(g2h(addr), TARGET_PAGE_SIZE, MADV_DONTNEED);
these values must be aligned on host page size.
Thanks,
Laurent
On 9/6/18 11:34 AM, Laurent Vivier wrote:
> Le 27/08/2018 à 10:40, Simon Hausmann a écrit :
>> Most flags to madvise() are just hints, so typically ignoring the
>> syscall and returning okay is fine. However applications exist that do
>> rely on MADV_DONTNEED behavior to guarantee that upon subsequent access
>> the mapping is refreshed from the backing file or zero for anonymous
>> mappings. The file backed mappings this is tricky - hence the original
>> comment - but for anonymous mappings it is safe to forward. We just need
>> to remember which those mappings are, which is now stored in the flags.
>>
>> Cc: Riku Voipio <riku.voipio@iki.fi>
>> Cc: Laurent Vivier <laurent@vivier.eu>
>> Cc: Sami Nurmenniemi <sami.nurmenniemi@qt.io>
>> Signed-off-by: Simon Hausmann <simon.hausmann@qt.io>
>>
>> --
>> v2:
>> - align start and len on host page size
>> v3:
>> - align start and len to target page size as it may be bigger than
>> host
>> - use immediate return in do_syscall
>> - forward MADV_DONTNEED advice only for anonymously mapped pages
>> - remember which mappings are anonymous in the page flags and preserve
>> those bits across mprotect calls.
>> ---
>> accel/tcg/translate-all.c | 8 +++++--
>> bsd-user/mmap.c | 8 +++----
>> include/exec/cpu-all.h | 11 ++++++++-
>> linux-user/elfload.c | 3 ++-
>> linux-user/mmap.c | 49 +++++++++++++++++++++++++++++++++++----
>> linux-user/qemu.h | 1 +
>> linux-user/syscall.c | 12 ++++------
>> 7 files changed, 72 insertions(+), 20 deletions(-)
>>
>> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
>> index 898c3bb3d1..467fbd9aeb 100644
>> --- a/accel/tcg/translate-all.c
>> +++ b/accel/tcg/translate-all.c
>> @@ -2481,7 +2481,8 @@ int page_get_flags(target_ulong address)
>> /* Modify the flags of a page and invalidate the code if necessary.
>> The flag PAGE_WRITE_ORG is positioned automatically depending
>> on PAGE_WRITE. The mmap_lock should already be held. */
>> -void page_set_flags(target_ulong start, target_ulong end, int flags)
>> +void page_set_flags(target_ulong start, target_ulong end, int flags,
>> + enum page_set_flags_mode mode)
> Is it possible to not add the mode, and only use "flags" to set the new
> flag? Using page_get_flags() to keep flags that needed to be kept?
Hm, the reason why I didn't do that is because page_set_flags operates
on a range while page_get_flags() gets the flags for only one page.
If you prefer not to have the mode, then I can see only two other options:
(1) Assume that the flags are typically homogeneous and use
something like
page_set_flags(start, end, page_get_flags(start) | whatever)
(2) Change the call sites to not use the range setting ability of
page_set_flags but
loop itself.
I don't like either, so I went for the mode. But I can change to any way
you prefer.
>> {
>> target_ulong addr, len;
>>
>> @@ -2513,7 +2514,10 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
>> p->first_tb) {
>> tb_invalidate_phys_page(addr, 0);
>> }
>> - p->flags = flags;
>> + if (mode == PAGE_SET_ALL_FLAGS)
>> + p->flags = flags;
>> + else /* PAGE_SET_PROTECTION */
>> + p->flags |= (p->flags & ~(PAGE_BITS - 1)) | (flags & PAGE_BITS);
>> }
>> }
>>
>> diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
>> index 17f4cd80aa..4bfac81af4 100644
>> --- a/bsd-user/mmap.c
>> +++ b/bsd-user/mmap.c
>> @@ -125,7 +125,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
>> if (ret != 0)
>> goto error;
>> }
>> - page_set_flags(start, start + len, prot | PAGE_VALID);
>> + page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
> Why do you remove the PAGE_VALID flag?
As the mode set PAGE_SET_PROTECTION, the page_set_flags call only
affects the protection
bits and the existing PAGE_VALID is preserved. I believe PAGE_VALID must
be set for that range.
If that assumption is wrong, then I could change page_set_flags to
implicitly set PAGE_VALID if the
mode is to apply new protection flags. Would that be ok?
> ...
>> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
>> index 8638612aec..c59cf4359c 100644
>> --- a/linux-user/elfload.c
>> +++ b/linux-user/elfload.c
>> @@ -1702,7 +1702,8 @@ static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
>>
>> /* Ensure that the bss page(s) are valid */
>> if ((page_get_flags(last_bss-1) & prot) != prot) {
>> - page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot | PAGE_VALID);
>> + page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot,
>> + PAGE_SET_PROTECTION);
>> }
> PAGE_VALID?
Oh! You're right, here I mistakenly thought the pages are already marked
valid, but
it appears that this is a new mapping, so I'll fix that to set PAGE_VALID.
>>
>> if (host_start < host_map_start) {
>> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
>> index 41e0983ce8..fff29ee04b 100644
>> --- a/linux-user/mmap.c
>> +++ b/linux-user/mmap.c
>> @@ -124,7 +124,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
>> if (ret != 0)
>> goto error;
>> }
>> - page_set_flags(start, start + len, prot | PAGE_VALID);
>> + page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
>> mmap_unlock();
> PAGE_VALID?
>
> ...
Same - potentially wrong - reasoning as with the other mprotect() wrapper.
>> +int target_madvise(abi_ulong start, abi_ulong len, int advice)
>> +{
>> + abi_ulong end, addr;
>> + int ret;
>> +
>> + len = TARGET_PAGE_ALIGN(len);
>> + start &= TARGET_PAGE_MASK;
>> +
>> + if (!guest_range_valid(start, len)) {
>> + errno = EINVAL;
>> + return -1;
>> + }
>> +
>> + /* A straight passthrough may not be safe because qemu sometimes
>> + turns private file-backed mappings into anonymous mappings.
>> + Most flags are hints so we ignore them.
>> + One exception is made for MADV_DONTNEED on anonymous mappings,
>> + that applications may rely on to zero out pages. */
>> + if (advice & MADV_DONTNEED) {
>> + end = start + len;
>> + for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
>> + if (page_get_flags(addr) & PAGE_MAP_ANONYMOUS) {
>> + ret = madvise(g2h(addr), TARGET_PAGE_SIZE, MADV_DONTNEED);
> these values must be aligned on host page size.
>
Sorry, I think I assumed from the earlier statement that the target page
size may be bigger than the
host and that that would be sufficient for host alignment. I didn't
consider that the reverse may also apply.
Would the following be correct?
(1) Use target aligned start/len for guest_range_valid call
(2) Use host aligned start/end/increment for the loop (including
the page_get_flags call parameters).
Thanks for the feedback :)
Simon
Ping :)
On 8/27/18 10:40 AM, Simon Hausmann wrote:
> Most flags to madvise() are just hints, so typically ignoring the
> syscall and returning okay is fine. However applications exist that do
> rely on MADV_DONTNEED behavior to guarantee that upon subsequent access
> the mapping is refreshed from the backing file or zero for anonymous
> mappings. The file backed mappings this is tricky - hence the original
> comment - but for anonymous mappings it is safe to forward. We just need
> to remember which those mappings are, which is now stored in the flags.
>
> Cc: Riku Voipio <riku.voipio@iki.fi>
> Cc: Laurent Vivier <laurent@vivier.eu>
> Cc: Sami Nurmenniemi <sami.nurmenniemi@qt.io>
> Signed-off-by: Simon Hausmann <simon.hausmann@qt.io>
>
> --
> v2:
> - align start and len on host page size
> v3:
> - align start and len to target page size as it may be bigger than
> host
> - use immediate return in do_syscall
> - forward MADV_DONTNEED advice only for anonymously mapped pages
> - remember which mappings are anonymous in the page flags and preserve
> those bits across mprotect calls.
> ---
> accel/tcg/translate-all.c | 8 +++++--
> bsd-user/mmap.c | 8 +++----
> include/exec/cpu-all.h | 11 ++++++++-
> linux-user/elfload.c | 3 ++-
> linux-user/mmap.c | 49 +++++++++++++++++++++++++++++++++++----
> linux-user/qemu.h | 1 +
> linux-user/syscall.c | 12 ++++------
> 7 files changed, 72 insertions(+), 20 deletions(-)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 898c3bb3d1..467fbd9aeb 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -2481,7 +2481,8 @@ int page_get_flags(target_ulong address)
> /* Modify the flags of a page and invalidate the code if necessary.
> The flag PAGE_WRITE_ORG is positioned automatically depending
> on PAGE_WRITE. The mmap_lock should already be held. */
> -void page_set_flags(target_ulong start, target_ulong end, int flags)
> +void page_set_flags(target_ulong start, target_ulong end, int flags,
> + enum page_set_flags_mode mode)
> {
> target_ulong addr, len;
>
> @@ -2513,7 +2514,10 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
> p->first_tb) {
> tb_invalidate_phys_page(addr, 0);
> }
> - p->flags = flags;
> + if (mode == PAGE_SET_ALL_FLAGS)
> + p->flags = flags;
> + else /* PAGE_SET_PROTECTION */
> + p->flags |= (p->flags & ~(PAGE_BITS - 1)) | (flags & PAGE_BITS);
> }
> }
>
> diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
> index 17f4cd80aa..4bfac81af4 100644
> --- a/bsd-user/mmap.c
> +++ b/bsd-user/mmap.c
> @@ -125,7 +125,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
> if (ret != 0)
> goto error;
> }
> - page_set_flags(start, start + len, prot | PAGE_VALID);
> + page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
> mmap_unlock();
> return 0;
> error:
> @@ -214,7 +214,7 @@ static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
> to mmap. */
> page_set_flags(last_brk & TARGET_PAGE_MASK,
> TARGET_PAGE_ALIGN(new_brk),
> - PAGE_RESERVED);
> + PAGE_RESERVED, PAGE_SET_ALL_FLAGS);
> }
> last_brk = new_brk;
>
> @@ -397,7 +397,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
> }
> }
> the_end1:
> - page_set_flags(start, start + len, prot | PAGE_VALID);
> + page_set_flags(start, start + len, prot | PAGE_VALID, PAGE_SET_ALL_FLAGS);
> the_end:
> #ifdef DEBUG_MMAP
> printf("ret=0x" TARGET_FMT_lx "\n", start);
> @@ -460,7 +460,7 @@ int target_munmap(abi_ulong start, abi_ulong len)
> }
>
> if (ret == 0)
> - page_set_flags(start, start + len, 0);
> + page_set_flags(start, start + len, 0, PAGE_SET_ALL_FLAGS);
> mmap_unlock();
> return ret;
> }
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 117d2fbbca..100388dcd4 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -249,6 +249,9 @@ extern intptr_t qemu_host_page_mask;
> /* FIXME: Code that sets/uses this is broken and needs to go away. */
> #define PAGE_RESERVED 0x0020
> #endif
> +#if defined(CONFIG_LINUX) && defined(CONFIG_USER_ONLY)
> +#define PAGE_MAP_ANONYMOUS 0x0080
> +#endif
>
> #if defined(CONFIG_USER_ONLY)
> void page_dump(FILE *f);
> @@ -257,8 +260,14 @@ typedef int (*walk_memory_regions_fn)(void *, target_ulong,
> target_ulong, unsigned long);
> int walk_memory_regions(void *, walk_memory_regions_fn);
>
> +enum page_set_flags_mode {
> + PAGE_SET_ALL_FLAGS,
> + PAGE_SET_PROTECTION
> +};
> +
> int page_get_flags(target_ulong address);
> -void page_set_flags(target_ulong start, target_ulong end, int flags);
> +void page_set_flags(target_ulong start, target_ulong end, int flags,
> + enum page_set_flags_mode);
> int page_check_range(target_ulong start, target_ulong len, int flags);
> #endif
>
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 8638612aec..c59cf4359c 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -1702,7 +1702,8 @@ static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
>
> /* Ensure that the bss page(s) are valid */
> if ((page_get_flags(last_bss-1) & prot) != prot) {
> - page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot | PAGE_VALID);
> + page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot,
> + PAGE_SET_PROTECTION);
> }
>
> if (host_start < host_map_start) {
> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> index 41e0983ce8..fff29ee04b 100644
> --- a/linux-user/mmap.c
> +++ b/linux-user/mmap.c
> @@ -124,7 +124,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
> if (ret != 0)
> goto error;
> }
> - page_set_flags(start, start + len, prot | PAGE_VALID);
> + page_set_flags(start, start + len, prot, PAGE_SET_PROTECTION);
> mmap_unlock();
> return 0;
> error:
> @@ -362,6 +362,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
> int flags, int fd, abi_ulong offset)
> {
> abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
> + int page_flags;
>
> mmap_lock();
> #ifdef DEBUG_MMAP
> @@ -562,7 +563,11 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
> }
> }
> the_end1:
> - page_set_flags(start, start + len, prot | PAGE_VALID);
> + page_flags = prot | PAGE_VALID;
> + if (flags & MAP_ANONYMOUS) {
> + page_flags |= PAGE_MAP_ANONYMOUS;
> + }
> + page_set_flags(start, start + len, page_flags, PAGE_SET_ALL_FLAGS);
> the_end:
> #ifdef DEBUG_MMAP
> printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
> @@ -675,7 +680,7 @@ int target_munmap(abi_ulong start, abi_ulong len)
> }
>
> if (ret == 0) {
> - page_set_flags(start, start + len, 0);
> + page_set_flags(start, start + len, 0, PAGE_SET_ALL_FLAGS);
> tb_invalidate_phys_range(start, start + len);
> }
> mmap_unlock();
> @@ -755,10 +760,44 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
> } else {
> new_addr = h2g(host_addr);
> prot = page_get_flags(old_addr);
> - page_set_flags(old_addr, old_addr + old_size, 0);
> - page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
> + page_set_flags(old_addr, old_addr + old_size, 0,
> + PAGE_SET_ALL_FLAGS);
> + page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID,
> + PAGE_SET_ALL_FLAGS);
> }
> tb_invalidate_phys_range(new_addr, new_addr + new_size);
> mmap_unlock();
> return new_addr;
> }
> +
> +int target_madvise(abi_ulong start, abi_ulong len, int advice)
> +{
> + abi_ulong end, addr;
> + int ret;
> +
> + len = TARGET_PAGE_ALIGN(len);
> + start &= TARGET_PAGE_MASK;
> +
> + if (!guest_range_valid(start, len)) {
> + errno = EINVAL;
> + return -1;
> + }
> +
> + /* A straight passthrough may not be safe because qemu sometimes
> + turns private file-backed mappings into anonymous mappings.
> + Most flags are hints so we ignore them.
> + One exception is made for MADV_DONTNEED on anonymous mappings,
> + that applications may rely on to zero out pages. */
> + if (advice & MADV_DONTNEED) {
> + end = start + len;
> + for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
> + if (page_get_flags(addr) & PAGE_MAP_ANONYMOUS) {
> + ret = madvise(g2h(addr), TARGET_PAGE_SIZE, MADV_DONTNEED);
> + if (ret != 0) {
> + return ret;
> + }
> + }
> + }
> + }
> + return 0;
> +}
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index b4959e41c6..e5ac5e9c6a 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -437,6 +437,7 @@ int target_munmap(abi_ulong start, abi_ulong len);
> abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
> abi_ulong new_size, unsigned long flags,
> abi_ulong new_addr);
> +int target_madvise(abi_ulong start, abi_ulong len, int advice);
> extern unsigned long last_brk;
> extern abi_ulong mmap_next_start;
> abi_ulong mmap_find_vma(abi_ulong, abi_ulong);
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 850b72a0c7..4478de5fc8 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -5124,7 +5124,8 @@ static inline abi_ulong do_shmat(CPUArchState *cpu_env,
>
> page_set_flags(raddr, raddr + shm_info.shm_segsz,
> PAGE_VALID | PAGE_READ |
> - ((shmflg & SHM_RDONLY)? 0 : PAGE_WRITE));
> + ((shmflg & SHM_RDONLY) ? 0 : PAGE_WRITE),
> + PAGE_SET_ALL_FLAGS);
>
> for (i = 0; i < N_SHM_REGIONS; i++) {
> if (!shm_regions[i].in_use) {
> @@ -5150,7 +5151,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
> for (i = 0; i < N_SHM_REGIONS; ++i) {
> if (shm_regions[i].in_use && shm_regions[i].start == shmaddr) {
> shm_regions[i].in_use = false;
> - page_set_flags(shmaddr, shmaddr + shm_regions[i].size, 0);
> + page_set_flags(shmaddr, shmaddr + shm_regions[i].size, 0,
> + PAGE_SET_ALL_FLAGS);
> break;
> }
> }
> @@ -11559,11 +11561,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
>
> #ifdef TARGET_NR_madvise
> case TARGET_NR_madvise:
> - /* A straight passthrough may not be safe because qemu sometimes
> - turns private file-backed mappings into anonymous mappings.
> - This will break MADV_DONTNEED.
> - This is a hint, so ignoring and returning success is ok. */
> - return 0;
> + return get_errno(target_madvise(arg1, arg2, arg3));
> #endif
> #if TARGET_ABI_BITS == 32
> case TARGET_NR_fcntl64:
© 2016 - 2026 Red Hat, Inc.